# Student Performance DataSet For Final Lab

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
sns.set_theme()
pd.set_option('max_columns',23)
pd.set_option('max_rows',123)
import matplotlib.gridspec as gridspec
import warnings
warnings.filterwarnings("ignore")

**Reading Dataset**

In [None]:
df = pd.read_csv('/kaggle/input/students-performance-in-exams/StudentsPerformance.csv')

In [None]:
df.head()

**Information about dataset**

In [None]:
df.info()

In [None]:
df.describe()

**Making Result Columns Using Student Different Marks Obtain**

In [None]:
passing_threshold = 35

df['Math_Result'] = np.where(df['math score']>=35,'Pass','Fail')
df['reading_Result'] = np.where(df['math score']>=35,'Pass','Fail')
df['writing_Result'] = np.where(df['math score']>=35,'Pass','Fail')

df.columns
df['Result'] = df.apply(lambda x: 'Passed' if x['Math_Result'] == 'Pass' and x['reading_Result'] == 'Pass' and x['writing_Result'] == 'Pass' else 'Failed'
                        ,axis=1)

**Visualizing Student Marks using Seaborn KDEPlot**

In [None]:
#students marks plot
plt.figure(figsize=(10,7))
plt.suptitle('Density Plot of Marks',fontsize=18)

sns.kdeplot(x=df['math score'],shade=True,alpha=.3,palette='crest',label='math score',linewidth=0)
sns.kdeplot(x=df['reading score'],shade=True,alpha=.3,palette='crest',label='reading score',linewidth=0)
sns.kdeplot(x=df['writing score'],shade=True,alpha=.3,palette='crest',label='writing score',linewidth=0)

plt.legend()
plt.xlabel('Marks')

**Ploting Categorical Features**

In [None]:
fig = plt.figure(figsize=(15,5))
grid = gridspec.GridSpec(1,4)
grid.update(wspace=0.4)
lis = ['Math_Result','reading_Result','writing_Result','Result']
for indx,i in enumerate(lis):
    ax=plt.subplot(grid[0,indx])
    sns.countplot(data=df,x=i,ax=ax)

**Ploting Categorical Features with Result**

In [None]:
#cats X Result countplot

cats = [col for col in df.columns if df[col].dtype =='object']
nums = [col for col in df.columns if df[col].dtype =='int64']


pltcats =  cats[:4]
for indx,cat in enumerate(pltcats):
        
    rt = sns.countplot(data=df,x=cat,hue='Result')

    text_ln = len(str(rt.get_xticklabels()[0]))
    if text_ln > 25:
        rt.set_xticklabels(rt.get_xticklabels(),rotation = 30)
    else:
        pass
    plt.show()

**Calculating and adding new features Total_marks and Percentage**

In [None]:
df.columns
df['Total_marks'] = df['math score'] + df['reading score'] + df['writing score']
df['Percentage'] = df['Total_marks']/3


**specifying grades**

* 90-100  A+
* 83-91   A
* 75-82   B+
* 67-74   B
* 59-66   C
* 51-58   C+
* 43-50   D
* 35-42   D+
* 0-34    F


**Mapping this Grades into new feature Grading**

In [None]:
def Grading(per,result):
    if result=='Failed':
        return 'F'
    elif per>90:
        return 'A1'
    elif per>83:
        return 'A2'
    elif per>75:
        return 'B1'
    elif per>67:
        return 'B2'
    elif per>59:
        return 'C1'
    elif per>51:
        return 'C2'
    elif per>43:
        return 'D1'
    elif per>35:
        return 'D2'
    
df['Grading'] = df.apply(lambda x: Grading(x['Percentage'],x['Result']),axis=1)    

**1. Styling DataFrame on the basis of Results (last cell in a row)**

In [None]:
def colorrow(x):
    length = x.shape[0]
    if x['Result'] == 'Failed':
        return [''] * (length-1) + ['background-color: #FF4A35']
    elif x['Result'] == 'Passed':
        return [''] * (length-1) + ['background-color: #A6FF57'] 

df.head(20).style.apply(colorrow,axis=1)

**2. Styling DataFrame on the basis of Results (all cells in a row)**

In [None]:
def colorrow(x):
    length = x.shape[0]
    if x['Result'] == 'Failed':
        return ['background-color: #FF4A35'] * length
    elif x['Result'] == 'Passed':
        return ['background-color: #A6FF57'] * length

df.head(20).style.apply(colorrow,axis=1)

**Swarmplot of Grades with Numemrical features**

In [None]:
#grading plot with numeric features

for i in nums:
    plt.figure(figsize=(16,8))
    sns.swarmplot(data=df,x='Grading',y=i)
    plt.show()

**Conditional Plot using seaborn FacetGrid**

In [None]:
g = sns.FacetGrid(data=df,row='Grading',col='gender')
g.map(sns.histplot, 'math score')

**Strip Plot of Categorical Features with Continuous Features**

In [None]:
useful_cats = cats[:5]
row = len(useful_cats)
col = len(nums)

#this is my secound EDA dataset and i have gone through the seaborn and matplotlib.pyplot Api and made this code snippet.
fig, ax = plt.subplots(row,col,figsize=(32,28))

plt.subplots_adjust(wspace=0.2,hspace=0.4)
plt.suptitle('Strip Plot of Categorical Features with Continuous Features',y=0.9,fontsize=20)
for roww,cat in enumerate(useful_cats):
    for coll,num in enumerate(nums):
        rt = sns.stripplot(data = df,y=num,x=cat,ax=ax[roww][coll])
        
        text_ln = len(str(rt.get_xticklabels()[0]))
        if text_ln > 25:
            rt.set_xticklabels(rt.get_xticklabels(),rotation = 30)
        else:
            pass

**Box Plot of Categorical Features with Continuous Features**

In [None]:
fig, ax = plt.subplots(row,col,figsize=(30,32))

plt.subplots_adjust(wspace=0.2,hspace=0.4)
plt.suptitle('Box Plot of Categorical Features with Continuous Features',y=0.9,fontsize=20)
for roww,cat in enumerate(useful_cats):
    for coll,num in enumerate(nums):
        rt = sns.boxplot(data = df,y=num,x=cat,ax=ax[roww][coll])
        
        text_ln = len(str(rt.get_xticklabels()[0]))
        if text_ln > 25:
            rt.set_xticklabels(rt.get_xticklabels(),rotation = 30)
        else:
            pass

**Violin Plot of Categorical Features with Continuous Features**

In [None]:
fig, ax = plt.subplots(row,col,figsize=(32,32))

plt.subplots_adjust(wspace=0.2,hspace=0.4)
plt.suptitle('Violin Plot of Categorical Features with Continuous Features',y=0.9,fontsize=20)
for roww,cat in enumerate(useful_cats):
    for coll,num in enumerate(nums):
        rt = sns.violinplot(data = df,y=num,x=cat,ax=ax[roww][coll])
        
        text_ln = len(str(rt.get_xticklabels()[0]))
        if text_ln > 25:
            rt.set_xticklabels(rt.get_xticklabels(),rotation = 30)
        else:
            pass

**Categorical Count Plot**

In [None]:
fig, ax = plt.subplots(2,3,figsize=(18,8))

plt.subplots_adjust(wspace=0.3,hspace=0.4)
plt.suptitle('Categorical Count Plot',y=0.93,fontsize=20)

for col,cat in enumerate(useful_cats):
    row = 0
    if col >2:
        row+=1
        col-=3
    rt = sns.countplot(x=df[cat],ax=ax[row][col])
    
    text_ln = len(str(rt.get_xticklabels()[0]))
    if text_ln > 25:
        rt.set_xticklabels(rt.get_xticklabels(),rotation = 30)
    else:
        pass


**Correlation of marks Features**

In [None]:
heat = df.corr()

**HeatMap Plot of Dataset df**

In [None]:

sns.heatmap(data=heat,annot=True)