In [6]:
import pandas as pd
from sklearn.preprocessing import LabelEncoder

# Step 1: Load the CSV file
df = pd.read_csv('1.csv')

# Step 2: Check for null values
print("Null values in each column:")
print(df.isnull().sum())

# Step 3: Remove rows with null values (if any)
df = df.dropna()  # In your case, there probably aren't any

# Step 4: Convert categorical columns to numeric
gender_encoder = LabelEncoder()
eye_encoder = LabelEncoder()

# Convert 'Gender' to numeric (Male/Female -> 1/0)
df['Gender'] = gender_encoder.fit_transform(df['Gender'])

# Convert 'EyeColor' to numeric (Blue, Brown, Green, Hazel, Gray -> 0,1,2,3,4)
df['EyeColor'] = eye_encoder.fit_transform(df['EyeColor'])

# Step 5: Check the final DataFrame
print("\nFirst 10 rows of the numeric dataset:")
print(df.head(10))

# Step 6: Check info to confirm all numeric
print("\nDataFrame info:")
print(df.info())


Null values in each column:
Age         0
Gender      0
EyeColor    0
dtype: int64

First 10 rows of the numeric dataset:
   Age  Gender  EyeColor
0   25       1         0
1   30       0         1
2   22       1         3
3   28       0         4
4   35       1         2
5   40       0         0
6   27       1         1
7   32       0         3
8   24       1         4
9   29       0         2

DataFrame info:
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 50 entries, 0 to 49
Data columns (total 3 columns):
 #   Column    Non-Null Count  Dtype
---  ------    --------------  -----
 0   Age       50 non-null     int64
 1   Gender    50 non-null     int64
 2   EyeColor  50 non-null     int64
dtypes: int64(3)
memory usage: 1.3 KB
None


In [8]:
# Display all rows
print(df.to_string())



    Age  Gender  EyeColor
0    25       1         0
1    30       0         1
2    22       1         3
3    28       0         4
4    35       1         2
5    40       0         0
6    27       1         1
7    32       0         3
8    24       1         4
9    29       0         2
10   31       1         0
11   26       0         1
12   23       1         3
13   34       0         4
14   38       1         2
15   21       0         0
16   33       1         1
17   37       0         3
18   36       1         4
19   39       0         2
20   28       1         0
21   25       0         1
22   30       1         3
23   32       0         4
24   29       1         2
25   31       0         0
26   27       1         1
27   26       0         3
28   35       1         4
29   33       0         2
30   22       1         0
31   24       0         1
32   36       1         3
33   38       0         4
34   21       1         2
35   23       0         0
36   39       1         1
37   40     

In [9]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load CSV
df = pd.read_csv('1.csv')

# Encode categorical columns
from sklearn.preprocessing import LabelEncoder
le_gender = LabelEncoder()
le_eye = LabelEncoder()
df['Gender'] = le_gender.fit_transform(df['Gender'])
df['EyeColor'] = le_eye.fit_transform(df['EyeColor'])

# Features and target
X = df[['Age', 'Gender']]  # Features
y = df['EyeColor']         # Target

# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Initialize and train regression model
reg = LinearRegression()
reg.fit(X_train, y_train)

# Predict
y_pred = reg.predict(X_test)

# Evaluate
print("Predictions:", y_pred)
print("Rounded Predictions:", [round(i) for i in y_pred])
print("MSE:", mean_squared_error(y_test, y_pred))
print("R2 Score:", r2_score(y_test, y_pred))


Predictions: [2.32843808 2.51784737 1.51294755 2.1390288  2.51784737 2.33372112
 1.82862969 2.1390288  2.39685755 2.64412022]
Rounded Predictions: [2, 3, 2, 2, 3, 2, 2, 2, 2, 3]
MSE: 1.8976439137587477
R2 Score: 0.17133453547652944
