In [None]:


import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error, r2_score

# Load the dataset
data = pd.read_csv(r"/content/test_2.csv")

# Display the first few rows to understand the structure
print(data.head())

# Assuming the dataset has a 'Seat No' and 'CGPA' column
# Remove 'Seat No' as it is not a feature
data = data.drop('Seat No.', axis=1)

# Separate features (X) and target (y)
X = data.drop('CGPA', axis=1)
y = data['CGPA']

# *** Identify and handle non-numerical columns ***
# Convert non-numerical columns to numerical using one-hot encoding or other suitable methods
# For example, if 'column_name' is a non-numerical column:
# X = pd.get_dummies(X, columns=['column_name'], drop_first=True)

# Find the columns with object (string) data type
object_columns = X.select_dtypes(include=['object']).columns

# One-hot encode the object columns
for col in object_columns:
    X = pd.get_dummies(X, columns=[col], prefix=[col], drop_first=True)

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Standardize the features
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize and train the regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

# Evaluate the model
mse = mean_squared_error(y_test, y_pred)
r2 = r2_score(y_test, y_pred)

print(f"Mean Squared Error: {mse}")
print(f"R-squared Score: {r2}")

# Predict CGPA for new data
# new_data = [[...]]  # Add new data in the format of your feature columns
# new_pred = model.predict(scaler.transform(new_data))
# print(f"Predicted CGPA: {new_pred}")

   Seat No. PH-121 HS-101 CY-105 HS-105/12 MT-111 CS-105 CS-106 EL-102 EE-119  \
0  CS-97001     B-     D+     C-         C     C-     D+      D     C-     B-   
1  CS-97002      A      D     D+         D     B-      C      D      A     D+   
2  CS-97003      A      B      A        B-     B+      A     B-     B+     A-   
3  CS-97004      D     C+     D+         D      D     A-     D+     C-      D   
4  CS-97005     A-     A-     A-        B+      A      A     A-     B+      A   

   ... CS-312 CS-317 CS-403 CS-421 CS-406 CS-414 CS-419 CS-423 CS-412   CGPA  
0  ...     C-     C-     C-     C-     A-      A     C-      B     A-  2.205  
1  ...     D+      D      C      D     A-     B-      C      C      B  2.008  
2  ...      B      B      A      C      A      A      A     A-      A  3.608  
3  ...     D+      C     D+     C-     B-      B     C+     C+     C+  1.906  
4  ...     B-     B+     B+     B-     A-      A     A-     A-      A  3.448  

[5 rows x 43 columns]
Mean Squared Err