In [None]:
import sqlite3
import pandas as pd
import warnings
warnings.filterwarnings('ignore')

In [None]:
# Connect to the SQLite database
db_path = "mapped_data.db"  # Replace with your database file path
conn = sqlite3.connect(db_path)

# Query to fetch data
query = "SELECT * from student_institution_mappings;"  # Replace 'students' with your table name

# Load data into a Pandas DataFrame
df = pd.read_sql_query(query, conn)

# Close the connection
conn.close()

# Display the first few rows of the DataFrame
print("Data from the SQLite table:")
print(df.head())  # Use print() instead of display() for standard Python environments



Data from the SQLite table:
   id  ... institution_rank
0   1  ...                1
1   2  ...                1
2   3  ...                1
3   4  ...                1
4   5  ...                1

[5 rows x 12 columns]


In [None]:
print(df[0:12])

    id  ... institution_rank
0    1  ...                1
1    2  ...                1
2    3  ...                1
3    4  ...                1
4    5  ...                1
5    6  ...                1
6    7  ...                2
7    8  ...                2
8    9  ...                2
9   10  ...                2
10  11  ...                2
11  12  ...                2

[12 rows x 12 columns]


In [None]:
df.shape

(600, 12)


In [None]:
df.info()


<class 'pandas.core.frame.DataFrame'>
RangeIndex: 600 entries, 0 to 599
Data columns (total 12 columns):
 #   Column                 Non-Null Count  Dtype  
---  ------                 --------------  -----  
 0   id                     600 non-null    int64  
 1   student_name           600 non-null    object 
 2   cgpa                   600 non-null    float64
 3   project_score          600 non-null    float64
 4   internships            600 non-null    int64  
 5   extracurricular_score  600 non-null    float64
 6   total_score            600 non-null    float64
 7   department             600 non-null    object 
 8   field                  600 non-null    object 
 9   outcome                600 non-null    object 
 10  institution_name       600 non-null    object 
 11  institution_rank       600 non-null    int64  
dtypes: float64(4), int64(3), object(5)
memory usage: 56.4+ KB


In [None]:
df.isnull().sum()


id                       0
student_name             0
cgpa                     0
project_score            0
internships              0
extracurricular_score    0
total_score              0
department               0
field                    0
outcome                  0
institution_name         0
institution_rank         0
dtype: int64


In [None]:
df.describe()


               id        cgpa  ...  total_score  institution_rank
count  600.000000  600.000000  ...   600.000000        600.000000
mean   300.500000    7.959567  ...   258.600100         50.490000
std    173.349358    1.153797  ...    32.769352         28.885299
min      1.000000    6.000000  ...   178.200000          1.000000
25%    150.750000    6.957500  ...   233.390000         25.750000
50%    300.500000    7.930000  ...   257.660000         50.500000
75%    450.250000    9.012500  ...   284.295000         75.250000
max    600.000000   10.000000  ...   337.200000        100.000000

[8 rows x 7 columns]


In [None]:
df.columns

Index(['id', 'student_name', 'cgpa', 'project_score', 'internships',
       'extracurricular_score', 'total_score', 'department', 'field',
       'outcome', 'institution_name', 'institution_rank'],
      dtype='object')


In [None]:
df['outcome'].unique()

['Selected' 'No Offer' 'Rejected']


In [None]:
df['outcome'].value_counts()

outcome
Selected    265
Rejected    249
No Offer     86
Name: count, dtype: int64


In [None]:
df = df.drop(['id', 'student_name','institution_name','institution_rank'],axis=1)

     cgpa  project_score  internships  ...        department   field   outcome
0    9.99           99.9            3  ...  Computer Science      IT  Selected
1    9.39           99.9            2  ...        Mechanical  Non-IT  Selected
2    8.59           97.0            2  ...  Computer Science      IT  No Offer
3    9.85           99.4            2  ...        Mechanical  Non-IT  Selected
4    8.66           98.7            3  ...             Civil  Non-IT  No Offer
..    ...            ...          ...  ...               ...     ...       ...
595  6.67           53.7            3  ...  Computer Science      IT  Selected
596  8.82           50.2            1  ...        Mechanical  Non-IT  Rejected
597  6.50           50.2            2  ...       Electronics      IT  Selected
598  7.23           52.7            0  ...        Electrical  Non-IT  Rejected
599  6.30           54.4            0  ...        Electrical  Non-IT  Rejected

[600 rows x 8 columns]


In [None]:
df['department'].unique()

['Civil' 'Electrical' 'Computer Science' 'Mechanical' 'Electronics']


In [None]:
df['department'] = df['department'].map({'Computer Science':0, 'Mechanical':2, 'Civil':3, 'Electrical':4, 'Electronics':1})

0      0
1      2
2      0
3      2
4      3
      ..
595    0
596    2
597    1
598    4
599    4
Name: department, Length: 600, dtype: int64


In [None]:
df['field'].unique()

['Non-IT' 'IT']


In [None]:
df['field'] = df['field'].map({'IT':0, 'Non-IT':1})

0      0
1      1
2      0
3      1
4      1
      ..
595    0
596    1
597    0
598    1
599    1
Name: field, Length: 600, dtype: int64


In [None]:
df['outcome'].unique()

['Selected' 'No Offer' 'Rejected']


In [None]:
df['outcome'] = df['outcome'].map({'Selected':0, 'No Offer':2, 'Rejected':1})

0      0
1      0
2      2
3      0
4      2
      ..
595    0
596    1
597    0
598    1
599    1
Name: outcome, Length: 600, dtype: int64


In [None]:
df.columns

Index(['cgpa', 'project_score', 'internships', 'extracurricular_score',
       'total_score', 'department', 'field', 'outcome'],
      dtype='object')


In [None]:
X = df.drop('outcome',axis=1)
y = df['outcome']

In [None]:
X

      cgpa  project_score  internships  ...  total_score  department  field
0    10.00           98.4            1  ...       337.20           3      1
1     9.81           98.6            0  ...       334.94           4      1
2     9.26           97.4            1  ...       333.04           0      0
3     8.15           99.1            2  ...       327.10           3      1
4     6.44           99.9            1  ...       325.56           4      1
..     ...            ...          ...  ...          ...         ...    ...
595   7.13           53.6            1  ...       189.72           2      1
596   6.91           50.5            0  ...       188.54           2      1
597   6.73           50.1            1  ...       188.02           2      1
598   7.26           51.2            2  ...       186.64           0      0
599   6.40           50.5            1  ...       178.20           0      0

[600 rows x 7 columns]


In [None]:
from sklearn.model_selection import train_test_split

In [None]:
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.20, random_state=42)  # 80% training, 20% test

In [None]:
X.head()

    cgpa  project_score  internships  ...  total_score  department  field
0  10.00           98.4            1  ...       337.20           3      1
1   9.81           98.6            0  ...       334.94           4      1
2   9.26           97.4            1  ...       333.04           0      0
3   8.15           99.1            2  ...       327.10           3      1
4   6.44           99.9            1  ...       325.56           4      1

[5 rows x 7 columns]


In [None]:
from sklearn.linear_model import LogisticRegression
from sklearn.neighbors import KNeighborsClassifier
from sklearn import svm 
from sklearn.tree import DecisionTreeClassifier
from sklearn.ensemble import RandomForestClassifier
from sklearn.ensemble import GradientBoostingClassifier

In [None]:
lr = LogisticRegression()
lr.fit(X_train,y_train)

svm = svm.SVC()
svm.fit(X_train,y_train)

knn = KNeighborsClassifier()
knn.fit(X_train,y_train)

dt = DecisionTreeClassifier()
dt.fit(X_train,y_train)

rf = RandomForestClassifier()
rf.fit(X_train,y_train)

gb = GradientBoostingClassifier()
gb.fit(X_train,y_train)

In [None]:
y_pred1 = lr.predict(X_test)
y_pred2 = svm.predict(X_test)
y_pred3 = knn.predict(X_test)
y_pred4 = dt.predict(X_test)
y_pred5 = rf.predict(X_test)
y_pred6 = gb.predict(X_test)

In [None]:
from sklearn.metrics import accuracy_score

In [None]:
score1 = accuracy_score(y_test,y_pred1)
score2 = accuracy_score(y_test,y_pred2)
score3 = accuracy_score(y_test,y_pred3)
score4 = accuracy_score(y_test,y_pred4)
score5 = accuracy_score(y_test,y_pred5)
score6 = accuracy_score(y_test,y_pred6)

In [None]:
print(score1,score2,score3,score4,score5,score6)

0.4583333333333333 0.525 0.49166666666666664 0.45 0.4666666666666667 0.4583333333333333


In [None]:
final_data = pd.DataFrame({'Models':['LR','SVC','KNN','DT','RF','GB'],
            'ACC':[score1*100,
                   score2*100,
                   score3*100,
                   score4*100,
                   score5*100,
                   score6*100]})

In [None]:
final_data

  Models        ACC
0     LR  45.833333
1    SVC  52.500000
2    KNN  49.166667
3     DT  45.000000
4     RF  46.666667
5     GB  45.833333


In [None]:
import seaborn as sns
import matplotlib.pyplot as plt

In [None]:
sns.barplot(x='Models', y='ACC', data=final_data)
plt.show()

Axes(0.125,0.11;0.775x0.77)


In [None]:
gb = GradientBoostingClassifier()
gb.fit(X,y)

In [None]:
new_data = pd.DataFrame({
    'cgpa':9.1,
    'project_score':95,
    'internships':1,
    'extracurricular_score':82,
    'total_score':312,
    'department':0,
    'field':0,
},index=[0])

In [None]:
p = gb.predict(new_data)
prob = gb.predict_proba(new_data)[0]
if p==0:
    print('Selected')
    print(f"You will be Selected with probability of {prob[0]:.2f}")
elif p==1:
    print('Rejected')
else:
    print('No offer')

Selected
You will be Selected with probability of 0.80


In [None]:
prob

[0.8019188  0.17277702 0.02530417]


In [None]:
import joblib

In [None]:
joblib.dump(gb,'model_placement_prediction')

['model_placement_prediction']


In [None]:
model = joblib.load('model_placement_prediction')

In [None]:
model.predict(new_data)

[0]
