In [None]:
# Step 1 : import library
import pandas as pd

In [None]:
# Step 2 : import data
admission = pd.read_csv('https://github.com/ybifoundation/Dataset/raw/main/Admission%20Chance.csv')

admission.head()

admission.info()

# Check the column names
print(admission.columns)

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 400 entries, 0 to 399
Data columns (total 9 columns):
 #   Column             Non-Null Count  Dtype  
---  ------             --------------  -----  
 0   Serial No          400 non-null    int64  
 1   GRE Score          400 non-null    int64  
 2   TOEFL Score        400 non-null    int64  
 3   University Rating  400 non-null    int64  
 4    SOP               400 non-null    float64
 5   LOR                400 non-null    float64
 6   CGPA               400 non-null    float64
 7   Research           400 non-null    int64  
 8   Chance of Admit    400 non-null    float64
dtypes: float64(4), int64(5)
memory usage: 28.2 KB
Index(['Serial No', 'GRE Score', 'TOEFL Score', 'University Rating', ' SOP',
       'LOR ', 'CGPA', 'Research', 'Chance of Admit '],
      dtype='object')


In [None]:
# Step 3 : define target (y) and features (X)

admission.columns

# Update the target variable and feature columns based on the given situation
y = admission[['Chance of Admit ']]
X = admission[['GRE Score', 'TOEFL Score', 'University Rating', ' SOP', 'LOR ', 'CGPA', 'Research']]

In [None]:
# Step 4 : train test split
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(X, y, train_size=0.7, random_state=2529)

# check shape of train and test sample
X_train.shape, X_test.shape, y_train.shape, y_test.shape

((280, 7), (120, 7), (280, 1), (120, 1))

In [None]:
# Step 5 : select model
from sklearn.linear_model import LinearRegression
model = LinearRegression()

In [None]:
# Step 6 : train or fit model
model.fit(X_train, y_train)

model.intercept_

model.coef_

array([[ 0.00204057,  0.00287273,  0.00566887, -0.00380559,  0.01973175,
         0.11314449,  0.02061553]])

In [None]:
# Step 7 : predict model
# The output you see is a NumPy array containing the predicted values for the chances of admission based on the features in the test dataset. Each row in the array corresponds to a single data point in the test dataset.
y_pred = model.predict(X_test)

print(y_pred)

[[0.71426327]
 [0.72534136]
 [0.69677103]
 [0.66566584]
 [0.57483872]
 [0.93087527]
 [0.93701113]
 [0.72361387]
 [0.81130158]
 [0.62223963]
 [0.59629648]
 [0.80084072]
 [0.52537944]
 [0.79174558]
 [0.84064992]
 [0.66429594]
 [0.65136589]
 [0.66990687]
 [0.75794085]
 [0.86072023]
 [0.66088101]
 [0.85570763]
 [0.84777425]
 [0.95033179]
 [0.68750762]
 [0.65907671]
 [0.65279623]
 [0.5709259 ]
 [0.55895645]
 [0.57990205]
 [0.54497918]
 [0.7570717 ]
 [0.69682571]
 [0.77286067]
 [0.64320811]
 [0.5183554 ]
 [0.43816818]
 [0.84654064]
 [0.90398354]
 [0.80517781]
 [0.72218971]
 [0.72882587]
 [0.68145136]
 [0.88592237]
 [0.77208852]
 [0.78778085]
 [0.95526121]
 [0.88586486]
 [0.59980416]
 [0.50690214]
 [0.59947098]
 [0.63380406]
 [0.82841217]
 [0.44911724]
 [0.71068577]
 [0.77335748]
 [0.68851557]
 [0.64486026]
 [0.85537724]
 [0.65517768]
 [0.65046031]
 [0.90818978]
 [0.63422429]
 [0.68658606]
 [0.72150268]
 [0.69030545]
 [0.59381287]
 [0.93813035]
 [0.58997351]
 [0.91542587]
 [0.59283415]
 [0.93

In [None]:
# Convert y_pred to a DataFrame for better visualization
y_pred_df = pd.DataFrame(data=y_pred, columns=['Predicted Chance of Admit'])

# Concatenate y_pred_df with the actual y_test DataFrame
result_df = pd.concat([y_test.reset_index(drop=True), y_pred_df], axis=1)

# Print the result DataFrame
print(result_df)

     Chance of Admit   Predicted Chance of Admit
0                0.63                   0.714263
1                0.74                   0.725341
2                0.68                   0.696771
3                0.71                   0.665666
4                0.68                   0.574839
..                ...                        ...
115              0.76                   0.763017
116              0.71                   0.710803
117              0.64                   0.626133
118              0.82                   0.839512
119              0.72                   0.685783

[120 rows x 2 columns]


In [None]:
# Step 8 : model accuracy
from sklearn.metrics import mean_absolute_error, mean_squared_error

mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)

print("Mean Absolute Error:", mae)
print("Mean Squared Error:", mse)

Mean Absolute Error: 0.04400128934232651
Mean Squared Error: 0.004038263715495693
