# Students Perfromance in Exams Visualization

In [None]:
import os
for dirname, _, filenames in os.walk('/kaggle/input'):
    for filename in filenames:
        print(os.path.join(dirname, filename))

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
df = pd.read_csv('/kaggle/input/students-performance-in-exams/StudentsPerformance.csv')
df.head()

In [None]:
df.info()

In [None]:
# Checking for missing value
df.isna().sum()

In [None]:
df.describe()

In [None]:
df.columns

### Gender

In [None]:
df['gender'].value_counts()

In [None]:
plt.figure(figsize=(15,8))
df['gender'].value_counts().plot.pie(y='gender',autopct="%1.1f%%",startangle=90,
                                    cmap='Set3_r',fontsize=13,explode=[0.05]*2,shadow=True)
plt.ylabel("")
plt.title("Gender Distribution",fontsize=14,bbox={'facecolor':'blue','alpha':0.2})
plt.show()

### Race/Ethnicity

In [None]:
df['race/ethnicity'].value_counts()

In [None]:
plt.figure(figsize=(12,8))
sns.countplot(data=df, x='race/ethnicity',order=df['race/ethnicity'].value_counts().index,palette='Pastel1_r')
plt.show()

In [None]:
plt.figure(figsize=(15,8))
df['race/ethnicity'].value_counts().plot.pie(y='race/ethnicity',autopct="%1.1f%%",startangle=90,
                                    cmap='Set2_r',fontsize=13,explode=[0.05]*5,shadow=True)
plt.ylabel("")
plt.title("Race/Ethnicity Distribution",fontsize=14,bbox={'facecolor':'blue','alpha':0.2})
plt.show()

### Parental level of education

In [None]:
df['parental level of education'].value_counts()

In [None]:
plt.figure(figsize=(12,8))
sns.countplot(data=df, x='parental level of education',order=df['parental level of education'].value_counts().index,
              palette='Pastel2_r')
plt.show()

### Lunch

In [None]:
df['lunch'].value_counts()

In [None]:
plt.figure(figsize=(15,8))
df['lunch'].value_counts().plot.pie(y='lunch',autopct="%1.1f%%",
                                                      cmap='Set3',startangle=90,fontsize=13,
                                                     explode=[0.05]*2,shadow=True)
plt.ylabel("")
plt.title("Standard VS. Free/Reduced Lunch Price",fontsize=14,bbox={'facecolor':'blue','alpha':0.2})
plt.show()

### Test Preparation Course

In [None]:
# 'test preparation course', 'math score', 'reading score','writing score'
df['test preparation course'].value_counts()

In [None]:
# Percentage Distribution
plt.figure(figsize=(15,8))
df['test preparation course'].value_counts().plot.pie(y='test preparation course',autopct="%1.1f%%",
                                                      cmap='Pastel2',startangle=90,fontsize=13,
                                                     explode=[0.05]*2,shadow=True)
plt.title("Complete Test preparation course distribution",fontsize=14,bbox={'facecolor':'blue','alpha':0.2})
plt.ylabel("")
plt.show()

In [None]:
# Taken test preparation course, seperated by gender
plt.figure(figsize=(12,8))
sns.countplot(data=df, x='test preparation course',hue='gender',order=df['test preparation course'].value_counts().index,
              palette='Pastel1')
plt.show()

# All Test Score

In [None]:
plt.figure(figsize=(15,8))
sns.boxplot(x="variable", y="value", data=pd.melt(df[['math score', 'reading score','writing score']]),
           palette='viridis')
plt.xticks(fontsize=13)
plt.xlabel("")
plt.yticks(fontsize=13)
plt.ylabel("Test Score",fontsize=14)
plt.show()

In [None]:
# If I were to remove Outlier...
def removeOutlier(feature,new_df):
    p25 = new_df[feature].quantile(0.25)
    p75 = new_df[feature].quantile(0.75)
    iqr = p75 - p25
    
    upper_limit = p75 + 1.5 * iqr
    lower_limit = p25 - 1.5 * iqr
    
    new_df = new_df[(new_df[feature] > lower_limit) & (new_df[feature] < upper_limit)]
    return new_df

math_df = removeOutlier('math score',df)

reading_df = removeOutlier('reading score',df)

writing_df = removeOutlier('writing score',df)

fig , (axm,axr,axw) = plt.subplots(3,1,sharex='col',figsize=(12,8))
sns.boxplot(data=math_df,x='math score',ax=axm,color='pink')
sns.boxplot(data=reading_df,x='reading score',ax=axr,color='darkturquoise')
sns.boxplot(data=writing_df,x='writing score',ax=axw,color='tomato')

for ax,label in zip([axm,axr,axw],['Math Score','Reading Score','Writing Score']):
    ax.set_xlabel("")
    ax.set_ylabel(label,fontsize=14)

plt.xticks(fontsize=13)
plt.tight_layout()
plt.show()

### Score Correlation

In [None]:
# Correlation bewteen differnet test score
df[['math score', 'reading score','writing score']].corr()

In [None]:
# Correlation - Heatmap
plt.figure(dpi=100)
sns.heatmap(df.corr(),mask=np.triu(df.corr()),annot=True,linewidth=3,linecolor='white',cmap="YlGnBu")
plt.yticks(rotation=0)
plt.show()

In [None]:
# Score and Gender
plt.figure(figsize=(15,8))
sns.pairplot(df,hue='gender',diag_kind="hist",markers=["o","D"],palette='Set1')
plt.show()

In [None]:
# Score and Test preparaction course
plt.figure(figsize=(15,8))
sns.pairplot(df,hue='test preparation course',diag_kind="hist",markers=["o","D"],palette='Set2_r')
plt.show()

### Test Preparation Course and Test Score

In [None]:
# Test preparation course and Score
fig , (axm,axr,axw) = plt.subplots(3,1,sharex='col',figsize=(15,8))
sns.histplot(data=df,x='math score',hue='test preparation course',alpha=0.5,
             palette='viridis_r',multiple='dodge',ax=axm)
axm.title.set_text('Math Score')


sns.histplot(data=df,x='reading score',hue='test preparation course',alpha=0.5,
             palette='inferno',multiple='dodge',ax=axr)
axr.title.set_text('Reading Score')


sns.histplot(data=df,x='writing score',hue='test preparation course',
             palette='PRGn_r',alpha=0.5,multiple='dodge',ax=axw)
axw.title.set_text('Writing Score')

plt.tight_layout()
plt.show()

### Parental Level of Education and Test Score 

In [None]:
df['parental level of education'].value_counts()

In [None]:
# Three complete education levels and Score
three_education = df[df['parental level of education'].isin(["master's degree","high school","bachelor's degree"])]

fig , (axm,axr,axw) = plt.subplots(3,1,sharex='col',figsize=(15,8))
sns.kdeplot(data=three_education,x='math score',hue='parental level of education',
            linewidth=0,fill=True,palette='bright',alpha=0.5,ax=axm)
axm.title.set_text('Math Score')

sns.kdeplot(data=three_education,x='reading score',hue='parental level of education',
            linewidth=0,fill=True,palette='Set2',alpha=0.5,ax=axr)
axr.title.set_text('Reading Score')

sns.kdeplot(data=three_education,x='writing score',hue='parental level of education',
            linewidth=0,fill=True,palette='Paired',alpha=0.5,ax=axw)
axw.title.set_text('Writing Score')

plt.xlabel("")
plt.tight_layout()
plt.show()

In [None]:
# Three incomplete education levels and Score
three_education = df[df['parental level of education'].isin(["some college","associate's degree","some high school"])]

fig , (axm,axr,axw) = plt.subplots(3,1,sharex='col',figsize=(15,8))
sns.kdeplot(data=three_education,x='math score',hue='parental level of education',
            linewidth=0,fill=True,palette='bright',alpha=0.5,ax=axm)
axm.title.set_text('Math Score')

sns.kdeplot(data=three_education,x='reading score',hue='parental level of education',
            linewidth=0,fill=True,palette='Set2',alpha=0.5,ax=axr)
axr.title.set_text('Reading Score')

sns.kdeplot(data=three_education,x='writing score',hue='parental level of education',
            linewidth=0,fill=True,palette='hls',alpha=0.5,ax=axw)
axw.title.set_text('Writing Score')

plt.xlabel("")
plt.tight_layout()
plt.show()

### Gender and Score

In [None]:
# Gender and Score
fig , (axm,axr,axw) = plt.subplots(3,1,sharex='col',figsize=(15,8))
sns.kdeplot(data=df, x='math score',hue='gender',fill=True,
           common_norm=False,palette='crest',alpha=0.5,linewidth=0,ax=axm)
axm.title.set_text('Math Score')

sns.kdeplot(data=df, x='reading score',hue='gender',fill=True,
           common_norm=False,palette='magma',alpha=0.5,linewidth=0,ax=axr)
axr.title.set_text('Reading Score')

sns.kdeplot(data=df, x='writing score',hue='gender',fill=True,
           common_norm=False,palette='YlGn_r',alpha=0.5,linewidth=0,ax=axw)
axw.title.set_text('Writing Score')

plt.xlabel("")
plt.tight_layout()
plt.show()

### Parental Level Education and Average Test Score

In [None]:
# Average Score by Parental level education - Approach 1
print("Average Score by Parental Level Education\n")
for education_level in df['parental level of education'].value_counts().index:
    print(education_level.capitalize(),": ")
    print('Math score: ',round(df[df['parental level of education'] == education_level]['math score'].mean(),2))
    print('Reading score: ',round(df[df['parental level of education'] == education_level]['reading score'].mean(),2))
    print('Writing score: ',round(df[df['parental level of education'] == education_level]['writing score'].mean(),2))
    print()

In [None]:
# Average Score by Parental level education - Approach 2
avr_score_edu = df[['math score','reading score','writing score']].groupby(df['parental level of education']).mean()
avr_score_edu

In [None]:
avr_score_edu.sort_values(by='math score',ascending = False)['math score']

In [None]:
avr_score_edu.sort_values(by='reading score',ascending = False)['reading score']

In [None]:
avr_score_edu.sort_values(by='writing score',ascending = False)['writing score']

In [None]:
avr_score_edu.plot(kind='bar',cmap='viridis',figsize=(15,8))
plt.ylabel("Average Score")
plt.show()

### Parental Level of Education (Master's degree)

In [None]:
master_df = df[df['parental level of education'] == "master's degree"]
master_df.head()

In [None]:
# Gender Distribution (Parents have Master's degree)
plt.figure(figsize=(15,8))
master_df['gender'].value_counts().plot.pie(y='gender',autopct="%1.1f%%",startangle=90,
                                    cmap='icefire',fontsize=13,explode=[0.05]*2,shadow=True)
plt.ylabel("")
plt.title("Gender Distribution (Parents have Master's degree)",fontsize=14,bbox={'facecolor':'blue','alpha':0.2})
plt.show()

In [None]:
plt.figure(figsize=(15,8))
master_df['race/ethnicity'].value_counts().plot.pie(y='race/ethnicity',autopct="%1.1f%%",startangle=90,
                                    cmap='Set2_r',fontsize=13,explode=[0.08]*5,shadow=True)
plt.ylabel("")
plt.title("Race/Ethnicity Distribution (Parents have Master's degree)",fontsize=14,bbox={'facecolor':'blue','alpha':0.2})
plt.show()

In [None]:
# Preparation for the test (Parents have Master's degree)
plt.figure(figsize=(15,8))
master_df['test preparation course'].value_counts().plot.pie(y='test preparation course',autopct="%1.1f%%",startangle=90,
                                    cmap='Pastel2',fontsize=13,explode=[0.05]*2,shadow=True)
plt.ylabel("")
plt.title("Test Preparation Course (Parents have Master's degree)",fontsize=14,bbox={'facecolor':'blue','alpha':0.2})
plt.show()

In [None]:
# Race/Ethnicity and Test Preparation Course
plt.figure(figsize=(15,8))
ax = sns.countplot(data=master_df,x='race/ethnicity',hue='test preparation course',palette='Set2_r')

for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()),
                    (p.get_x()+0.15, p.get_height()+0.2), fontsize=12)

plt.show()

In [None]:
math_race = master_df['math score'].groupby(master_df['race/ethnicity']).mean().to_frame()
reading_race = master_df['reading score'].groupby(master_df['race/ethnicity']).mean().to_frame()
writing_race = master_df['writing score'].groupby(master_df['race/ethnicity']).mean().to_frame()

fig, (axm,axr,axw) = plt.subplots(3,1,figsize=(15,12))

sns.barplot(data=math_race,y=math_race.index,x='math score',palette='Set2',ax=axm)

sns.barplot(data=reading_race,y=reading_race.index,x='reading score',palette='magma',ax=axr)

sns.barplot(data=writing_race,y=writing_race.index,x='writing score',palette='Pastel2_r',ax=axw)

plt.tight_layout()
plt.show()

# Evaluate Students "Letter" Grade

In [None]:
new_df = df.copy()

# Creating new column to store average total score for each students
new_df['Average Total Score'] = (new_df['math score'] + new_df['reading score'] + new_df['writing score'])/3

# Grading Scale I got from the internet
def letterGrade(score):
    if 80 <= score: 
        return 'A'
    elif 65 <= score: 
        return 'B'
    elif 55 <= score: 
        return 'C'
    elif 50 <= score: 
        return 'D'
    else: 
        return 'F'

# Apply the letterGrade function to get letter grade for each students
new_df['Letter Grade'] = new_df['Average Total Score'].apply(lambda score: letterGrade(score))
new_df.head()

In [None]:
# Letter Grade and Test Preparation Course
plt.figure(figsize=(15,8))
ax = sns.countplot(data=new_df,x='Letter Grade',hue='test preparation course',palette='Set3_r',
             order = ['A','B','C','D','F'])

for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()),
                    (p.get_x()+0.15, p.get_height()+0.5), fontsize=12)

plt.show()

In [None]:
# Letter Grade and Gender
plt.figure(figsize=(15,8))
ax = sns.countplot(data=new_df,x='Letter Grade',hue='gender',palette='coolwarm',
             order = ['A','B','C','D','F'])

for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()),
                    (p.get_x()+0.15, p.get_height()+0.5), fontsize=12)

plt.show()

In [None]:
# Letter Grade and Parental level of education
plt.figure(figsize=(15,8))
ax = sns.countplot(data=new_df,x='Letter Grade',hue='parental level of education',palette='rainbow_r',
             order = ['A','B','C','D','F'])

for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()),
                    (p.get_x()+0.02, p.get_height()+0.5), fontsize=12)

plt.show()

In [None]:
# Letter Grade and Race/Ethnicity
plt.figure(figsize=(15,8))
ax = sns.countplot(data=new_df,x='Letter Grade',hue='race/ethnicity',palette='gist_ncar_r',
             order = ['A','B','C','D','F'])

for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()),
                    (p.get_x()+0.02, p.get_height()+0.5), fontsize=12)

plt.show()

In [None]:
# Letter Grade and lunch
plt.figure(figsize=(15,8))
ax = sns.countplot(data=new_df,x='Letter Grade',hue='lunch',palette='viridis_r',
             order = ['A','B','C','D','F'])

for p in ax.patches:
        ax.annotate('{:.0f}'.format(p.get_height()),
                    (p.get_x()+0.15, p.get_height()+1), fontsize=12)

plt.show()

# Thank you for checking out my notebook!