In [2]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score


In [3]:
# Load the Iris dataset
df = pd.read_csv('https://raw.githubusercontent.com/mwaskom/seaborn-data/master/iris.csv')


In [4]:
df.head()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width,species
0,5.1,3.5,1.4,0.2,setosa
1,4.9,3.0,1.4,0.2,setosa
2,4.7,3.2,1.3,0.2,setosa
3,4.6,3.1,1.5,0.2,setosa
4,5.0,3.6,1.4,0.2,setosa


In [5]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 150 entries, 0 to 149
Data columns (total 5 columns):
 #   Column        Non-Null Count  Dtype  
---  ------        --------------  -----  
 0   sepal_length  150 non-null    float64
 1   sepal_width   150 non-null    float64
 2   petal_length  150 non-null    float64
 3   petal_width   150 non-null    float64
 4   species       150 non-null    object 
dtypes: float64(4), object(1)
memory usage: 6.0+ KB


In [6]:
df.describe()

Unnamed: 0,sepal_length,sepal_width,petal_length,petal_width
count,150.0,150.0,150.0,150.0
mean,5.843333,3.057333,3.758,1.199333
std,0.828066,0.435866,1.765298,0.762238
min,4.3,2.0,1.0,0.1
25%,5.1,2.8,1.6,0.3
50%,5.8,3.0,4.35,1.3
75%,6.4,3.3,5.1,1.8
max,7.9,4.4,6.9,2.5


In [7]:
df['species'].value_counts()

species
setosa        50
versicolor    50
virginica     50
Name: count, dtype: int64

In [8]:
# Encode the species to numeric type
species_mapping = {'setosa': 0, 'versicolor': 1, 'virginica': 2}
df['species_encoded'] = df['species'].map(species_mapping)

In [9]:
# For a binary classification problem, select any two classes
X = df.iloc[:, :4][df['species_encoded'] < 2]
y = df['species_encoded'][df['species_encoded'] < 2]

In [10]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # Added random_state for reproducibility


## Perceptron

https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.Perceptron.html#sklearn.linear_model.Perceptron

In [11]:
# Create a Perceptron model
perceptron = Perceptron()

# Train the model
perceptron.fit(X_train, y_train)

# Make predictions
y_pred = perceptron.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Accuracy: {accuracy}")


# Example of accessing the learned weights and bias (intercept)
print(f"Weights: {perceptron.coef_}")
print(f"Bias: {perceptron.intercept_}")

Accuracy: 1.0
Weights: [[-1.3 -4.5  6.8  3.1]]
Bias: [-1.]


## Adaline

https://scikit-learn.org/1.4/modules/generated/sklearn.linear_model.SGDClassifier.html#sklearn.linear_model.SGDClassifier

In [12]:
# Initialize and train the model with squared_loss (this behaves like ADALINE)
adaline = SGDClassifier(loss='squared_error', max_iter=1000, tol=1e-3, random_state=42)

adaline.fit(X_train, y_train)

# Make predictions
y_pred = adaline.predict(X_test)

# Evaluate the model
accuracy = adaline.score(X_test, y_test)
print(f"Accuracy: {accuracy}")

Accuracy: 0.4


## Your work

- choose two species from the penguin dataset below
- repeat the steps above
- try another parameters and observe the result

Penguin dataset

https://raw.githubusercontent.com/mwaskom/seaborn-data/refs/heads/master/penguins.csv

**Finish the notebook1.ipynb, send to zwu009@citymail.cuny.edu by 5:00 pm Feb 6, 2025 along with your quiz answer. **


In [13]:
import numpy as np
import pandas as pd
from sklearn.linear_model import Perceptron
from sklearn.linear_model import SGDClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [14]:
# Load the Penguin dataset
df = pd.read_csv("https://raw.githubusercontent.com/mwaskom/seaborn-data/refs/heads/master/penguins.csv")


In [15]:
# Display dataset info
df.head()


Unnamed: 0,species,island,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g,sex
0,Adelie,Torgersen,39.1,18.7,181.0,3750.0,MALE
1,Adelie,Torgersen,39.5,17.4,186.0,3800.0,FEMALE
2,Adelie,Torgersen,40.3,18.0,195.0,3250.0,FEMALE
3,Adelie,Torgersen,,,,,
4,Adelie,Torgersen,36.7,19.3,193.0,3450.0,FEMALE


In [16]:
df.info()# Display dataset info



<class 'pandas.core.frame.DataFrame'>
RangeIndex: 344 entries, 0 to 343
Data columns (total 7 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   species            344 non-null    object 
 1   island             344 non-null    object 
 2   bill_length_mm     342 non-null    float64
 3   bill_depth_mm      342 non-null    float64
 4   flipper_length_mm  342 non-null    float64
 5   body_mass_g        342 non-null    float64
 6   sex                333 non-null    object 
dtypes: float64(4), object(3)
memory usage: 18.9+ KB


In [17]:
df.describe()# Display dataset info



Unnamed: 0,bill_length_mm,bill_depth_mm,flipper_length_mm,body_mass_g
count,342.0,342.0,342.0,342.0
mean,43.92193,17.15117,200.915205,4201.754386
std,5.459584,1.974793,14.061714,801.954536
min,32.1,13.1,172.0,2700.0
25%,39.225,15.6,190.0,3550.0
50%,44.45,17.3,197.0,4050.0
75%,48.5,18.7,213.0,4750.0
max,59.6,21.5,231.0,6300.0


In [18]:
df['species'].value_counts()


species
Adelie       152
Gentoo       124
Chinstrap     68
Name: count, dtype: int64

In [19]:
# Encode the species to numeric type
species_mapping = {'Adelie': 0, 'Gentoo': 1, 'Chinstrap': 2}
df['species_encoded'] = df['species'].map(species_mapping)

In [20]:
# Drop rows with missing values to avoid errors
df = df.dropna()

In [21]:
# For a binary classification problem, select any two species (e.g., Adelie and Gentoo)
X = df.iloc[:, 2:6][df['species_encoded'] < 2]  # Select numerical columns only
y = df['species_encoded'][df['species_encoded'] < 2]

In [22]:
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # Added random_state for reproducibility


In [23]:
# Create a Perceptron model
perceptron = Perceptron()

# Train the model
perceptron.fit(X_train, y_train)

# Make predictions
y_pred = perceptron.predict(X_test)

# Evaluate the model
accuracy = accuracy_score(y_test, y_pred)
print(f"Perceptron Accuracy: {accuracy}")

# Example of accessing the learned weights and bias (intercept)
print(f"Weights: {perceptron.coef_}")
print(f"Bias: {perceptron.intercept_}")


Perceptron Accuracy: 0.37735849056603776
Weights: [[ -4229.5  -7298.2 -31017.    5325. ]]
Bias: [-265.]


In [24]:
## Adaline

# Initialize and train the model with squared_loss (this behaves like ADALINE)
adaline = SGDClassifier(loss='squared_error', max_iter=1000, tol=1e-3, random_state=42)

adaline.fit(X_train, y_train)

# Make predictions
y_pred = adaline.predict(X_test)

# Evaluate the model
accuracy = adaline.score(X_test, y_test)
print(f"Adaline Accuracy: {accuracy}")

Adaline Accuracy: 0.6226415094339622
