<a href="https://colab.research.google.com/github/lalitpatil-7/localrepo/blob/main/labassignment1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import LabelEncoder

# Load the dataset
try:
  df = pd.read_csv('/content/Students.csv')
except FileNotFoundError:
  print("Error: 'students.csv' not found. Please make sure the file is in the same directory or provide the correct path.")

print("Initial Dataframe")
print(df)

Initial Dataframe
   StudentID     Name  Gender  Age  MathScore  EnglishScore  TotalScore
0          1    Alice  Female   15       85.0          90.0         170
1          2      Bob    Male   16        NaN          95.0          95
2          3  Charlie    Male   15       80.0           NaN          80
3          4    Diana  Female   16       90.0          85.0         170
4          5      Eve  Female   15       75.0          70.0         140


In [None]:
# Fill missing values with the mean of each column
df['MathScore'].fillna(df['MathScore'].mean(), inplace=True)
df['EnglishScore'].fillna(df['EnglishScore'].mean(), inplace=True)

print("\nDataframe after filling missing values")
print(df)


Dataframe after filling missing values
   StudentID     Name  Gender  Age  MathScore  EnglishScore  TotalScore
0          1    Alice  Female   15       85.0          90.0         170
1          2      Bob    Male   16       82.5          95.0          95
2          3  Charlie    Male   15       80.0          85.0          80
3          4    Diana  Female   16       90.0          85.0         170
4          5      Eve  Female   15       75.0          70.0         140


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['MathScore'].fillna(df['MathScore'].mean(), inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df['EnglishScore'].fillna(df['EnglishScore'].mean(), inplace=True)


In [None]:
# Encode the Gender column
le = LabelEncoder()
df['Gender'] = le.fit_transform(df['Gender'])

print("\nDataframe after encoding gender")
print(df)


Dataframe after encoding gender
   StudentID     Name  Gender  Age  MathScore  EnglishScore  TotalScore
0          1    Alice       0   15       85.0          90.0         170
1          2      Bob       1   16       82.5          95.0          95
2          3  Charlie       1   15       80.0          85.0          80
3          4    Diana       0   16       90.0          85.0         170
4          5      Eve       0   15       75.0          70.0         140


In [None]:
# Correct the TotalScore column
df['TotalScore'] = df['MathScore'] + df['EnglishScore']

print("\nDataframe after correcting total score")
print(df)


Dataframe after correcting total score
   StudentID     Name  Gender  Age  MathScore  EnglishScore  TotalScore
0          1    Alice       0   15       85.0          90.0       175.0
1          2      Bob       1   16       82.5          95.0       177.5
2          3  Charlie       1   15       80.0          85.0       165.0
3          4    Diana       0   16       90.0          85.0       175.0
4          5      Eve       0   15       75.0          70.0       145.0


In [None]:
# Standardize the score columns using IQR
def standardize_iqr(column):
    Q1 = column.quantile(0.25)
    Q3 = column.quantile(0.75)
    IQR = Q3 - Q1
    lower_bound = Q1 - 1.5 * IQR
    upper_bound = Q3 + 1.5 * IQR
    column_no_outliers = np.clip(column, lower_bound, upper_bound)
    return (column_no_outliers - column_no_outliers.median()) / IQR

df['MathScore'] = standardize_iqr(df['MathScore'])
df['EnglishScore'] = standardize_iqr(df['EnglishScore'])
df['TotalScore'] = standardize_iqr(df['TotalScore'])

print("\nDataframe after standardizing the scores using IQR")
print(df)


Dataframe after standardizing the scores using IQR
   StudentID     Name  Gender  Age  MathScore  EnglishScore  TotalScore
0          1    Alice       0   15        0.5           1.0        0.00
1          2      Bob       1   16        0.0           2.0        0.25
2          3  Charlie       1   15       -0.5           0.0       -1.00
3          4    Diana       0   16        1.5           0.0        0.00
4          5      Eve       0   15       -1.5          -1.5       -2.50
