## Section 1:Importing required libraries.


In [1]:
import pandas as pd
import numpy as np

print("Pandas version:", pd.__version__)
print("NumPy version:", np.__version__)


Pandas version: 2.2.2
NumPy version: 1.26.4


## Section 2:Reading the data set 

In [2]:
df=pd.read_csv("StudentsPerformance.csv")
df.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


# Section 3:Data set inspection

In [3]:
df.shape

(1000, 8)

In [4]:
df.columns

Index(['gender', 'race/ethnicity', 'parental level of education', 'lunch',
       'test preparation course', 'math score', 'reading score',
       'writing score'],
      dtype='object')

In [5]:
df.dtypes

gender                         object
race/ethnicity                 object
parental level of education    object
lunch                          object
test preparation course        object
math score                      int64
reading score                   int64
writing score                   int64
dtype: object

## Section 4:Descriptive Statistics

In [6]:
df.describe()

Unnamed: 0,math score,reading score,writing score
count,1000.0,1000.0,1000.0
mean,66.089,69.169,68.054
std,15.16308,14.600192,15.195657
min,0.0,17.0,10.0
25%,57.0,59.0,57.75
50%,66.0,70.0,69.0
75%,77.0,79.0,79.0
max,100.0,100.0,100.0


In [7]:
df.mode()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group C,some college,standard,none,65,72,74


##  Section5: NumPy-Based Numerical Analysis



In [8]:
numeric_df=df.select_dtypes(include=["number"])

In [9]:
numeric_array=numeric_df.to_numpy()

In [10]:
np.mean(numeric_array)


67.77066666666667

In [11]:
np.min(numeric_array)

0

In [12]:
np.max(numeric_array)

100

In [13]:
np.mean(numeric_array,axis=0)

array([66.089, 69.169, 68.054])

## SECTION6 : SLICING

## 1.Slicing by Label

In [14]:
# Select all rows one columns
slicing1 = df.loc[:,['gender']]   
slicing1.head()

Unnamed: 0,gender
0,female
1,female
2,female
3,male
4,male


In [15]:
# Select first three rows for multiple columns
slicing2 = df.loc[0:2, ['lunch','math score'] ]
slicing2.head()

Unnamed: 0,lunch,math score
0,standard,72
1,standard,69
2,standard,90


In [16]:
# getting values with a boolean array
Slicing3 = df.loc[:, ['reading score']]>20
Slicing3.head()

Unnamed: 0,reading score
0,True
1,True
2,True
3,True
4,True


## 2.Slicing by Index

In [17]:
# select all rows for a specific column
Slicing4=df.iloc[:4]
Slicing4.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44


## SECTION 7 : SORTING

In [18]:
# sort by index
SortedIN = df.sort_index();
SortedIN.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
0,female,group B,bachelor's degree,standard,none,72,72,74
1,female,group C,some college,standard,completed,69,90,88
2,female,group B,master's degree,standard,none,90,95,93
3,male,group A,associate's degree,free/reduced,none,47,57,44
4,male,group C,some college,standard,none,76,78,75


In [19]:
#Sort the Columns
SortedINN = df.sort_index(axis=1,ascending=False);
SortedINN.head()

Unnamed: 0,writing score,test preparation course,reading score,race/ethnicity,parental level of education,math score,lunch,gender
0,74,none,72,group B,bachelor's degree,72,standard,female
1,88,completed,90,group C,some college,69,standard,female
2,93,none,95,group B,master's degree,90,standard,female
3,44,none,57,group A,associate's degree,47,free/reduced,male
4,75,none,78,group C,some college,76,standard,male


In [20]:
# Sort by value
SortedINNN = df.sort_values(by= ['writing score']);
SortedINNN.head()

Unnamed: 0,gender,race/ethnicity,parental level of education,lunch,test preparation course,math score,reading score,writing score
59,female,group C,some high school,free/reduced,none,0,17,10
596,male,group B,high school,free/reduced,none,30,24,15
327,male,group A,some college,free/reduced,none,28,23,19
76,male,group E,some high school,standard,none,30,26,22
980,female,group B,high school,free/reduced,none,8,24,23


## SECTION 8 : JSON FILE 

In [21]:
SortedIN.to_json(
    "Studenttt.json",
    orient="records",
    indent=4
)
