# **Pandas**

In [1]:
# importing pandas and numpy
import pandas as pd
import numpy as np 

***Creating dataframes***

In [2]:
# creating a dictionary
dc1 = {
    "Name": ["Shivam", "Ved", "kabir", "Tara"],
    "Age": [19, 22, 23, 22],
    "Marks": [98, 83, 67, 90]
}

In [3]:
# converting that into a dataframes
dataFrame1 = pd.DataFrame(dc1)
dataFrame1

Unnamed: 0,Name,Age,Marks
0,Shivam,19,98
1,Ved,22,83
2,kabir,23,67
3,Tara,22,90


In [4]:
# converting dataframes into numpy array
n_arr = np.array(dataFrame1)
print(n_arr)

[['Shivam' 19 98]
 ['Ved' 22 83]
 ['kabir' 23 67]
 ['Tara' 22 90]]


In [5]:
# converting the dataframe to a csv file
dataFrame1.to_csv("Pandas.csv")

# in case you don't want index then
dataFrame1.to_csv("Pandaswoindx.csv", index=False)

In [6]:
# viewing first n rows
dataFrame1.head(2)      # will show first 2 rows

Unnamed: 0,Name,Age,Marks
0,Shivam,19,98
1,Ved,22,83


In [7]:
# viewing last n rows
dataFrame1.tail(2)      # will show last 2 rows

Unnamed: 0,Name,Age,Marks
2,kabir,23,67
3,Tara,22,90


In [8]:
# showing statistical data of numerical columns
dataFrame1.describe()

Unnamed: 0,Age,Marks
count,4.0,4.0
mean,21.5,84.5
std,1.732051,13.178265
min,19.0,67.0
25%,21.25,79.0
50%,22.0,86.5
75%,22.25,92.0
max,23.0,98.0


In [9]:
# changing a certain value
dataFrame1["Age"][3] = 21
dataFrame1

# error is shown because changing a value in a dataframe will change it in in the memoery location

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  dataFrame1["Age"][3] = 21


Unnamed: 0,Name,Age,Marks
0,Shivam,19,98
1,Ved,22,83
2,kabir,23,67
3,Tara,21,90


In [10]:
# Changing the index values
dataFrame1.index = ["one", "two", "three", "four"]
dataFrame1

Unnamed: 0,Name,Age,Marks
one,Shivam,19,98
two,Ved,22,83
three,kabir,23,67
four,Tara,21,90


In [11]:
# printing the index
dataFrame1.index

Index(['one', 'two', 'three', 'four'], dtype='object')

***Reading csv/excel files and performing operations on it***

In [12]:
# reading an csv/excel sheet
dt = pd.read_excel("students marks.xlsx")
print(dt)

    S.no Name of student Name of college    Class  Subject 1  Subject 2  \
0      1          Shivam             VIPS   BCA 2         94         61   
1      2           Maina             VIPS   BCA 3         66         79   
2      3           Disha             VIPS   BCA 4         75         81   
3      4         Anshika             VIPS   BCA 5         84         62   
4      5        Himanshu             VIPS   BCA 6         97         84   
5      6           Abhay             VIPS   BCA 7         81         61   
6      7          Yogesh             VIPS   BCA 8         78         83   
7      8           Mehak             VIPS   BCA 9         68         79   
8      9            Yash             VIPS  BCA 10         73         68   
9     10         Bhavna              VIPS  BCA 11         86         61   
10    11           Aditi             VIPS  BCA 12         95         74   
11    12           Virat             VIPS  BCA 13         79         62   
12    13           Rohit 

In [13]:
# viewing a specific column
dt["Name of student"]

0       Shivam
1        Maina
2        Disha
3      Anshika
4     Himanshu
5        Abhay
6       Yogesh
7        Mehak
8         Yash
9      Bhavna 
10       Aditi
11       Virat
12       Rohit
13         Ved
14        Tara
15      Aditya
16        Geet
17       Kabir
18        Aman
19       Naina
Name: Name of student, dtype: object

In [14]:
# viewing a specific value
dt["Name of student"][1]

'Maina'

In [15]:
# analyising statistically
dt.describe()

Unnamed: 0,S.no,Subject 1,Subject 2,Subject 3,Subject 4,Total,Percentage
count,20.0,20.0,20.0,20.0,20.0,20.0,20.0
mean,10.5,79.8,76.2,79.45,78.85,314.3,0.78575
std,5.91608,10.267988,11.41375,12.563418,11.240317,22.110012,0.055275
min,1.0,63.0,61.0,60.0,60.0,274.0,0.685
25%,5.75,72.5,65.75,70.5,72.75,299.75,0.749375
50%,10.5,80.0,79.0,78.0,80.0,312.0,0.78
75%,15.25,85.25,83.0,91.25,84.5,330.0,0.825
max,20.0,97.0,99.0,98.0,97.0,357.0,0.8925


In [16]:
# printing datatypes of every column
dt.dtypes

S.no                  int64
Name of student      object
Name of college      object
Class                object
Subject 1             int64
Subject 2             int64
Subject 3             int64
Subject 4             int64
Total                 int64
Percentage          float64
Pass/Fail            object
Grade                object
dtype: object

In [17]:
# making a dataframe with numpy
newd = pd.DataFrame(np.random.rand(50, 5), index= np.arange(1, 51))
newd

Unnamed: 0,0,1,2,3,4
1,0.869508,0.5659,0.200853,0.376252,0.209145
2,0.530973,0.438647,0.715432,0.303449,0.988821
3,0.861492,0.969144,0.218016,0.396,0.35474
4,0.886995,0.909444,0.026132,0.220737,0.877158
5,0.977896,0.042893,0.129585,0.922921,0.549307
6,0.0164,0.343524,0.030535,0.397094,0.250096
7,0.940766,0.699441,0.475686,0.591167,0.001382
8,0.993387,0.489668,0.52732,0.584734,0.899804
9,0.738755,0.591534,0.686979,0.110728,0.620729
10,0.036536,0.4221,0.127753,0.684387,0.822726


In [18]:
# making a series
nser = pd.Series(np.arange(1, 21))
nser

0      1
1      2
2      3
3      4
4      5
5      6
6      7
7      8
8      9
9     10
10    11
11    12
12    13
13    14
14    15
15    16
16    17
17    18
18    19
19    20
dtype: int32

In [19]:
# printing tranpose of a dataframe
dt.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19
S.no,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20
Name of student,Shivam,Maina,Disha,Anshika,Himanshu,Abhay,Yogesh,Mehak,Yash,Bhavna,Aditi,Virat,Rohit,Ved,Tara,Aditya,Geet,Kabir,Aman,Naina
Name of college,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS,VIPS
Class,BCA 2,BCA 3,BCA 4,BCA 5,BCA 6,BCA 7,BCA 8,BCA 9,BCA 10,BCA 11,BCA 12,BCA 13,BCA 14,BCA 15,BCA 16,BCA 17,BCA 18,BCA 19,BCA 20,BCA 21
Subject 1,94,66,75,84,97,81,78,68,73,86,95,79,95,82,85,63,82,71,75,67
Subject 2,61,79,81,62,84,61,83,79,68,61,74,62,83,78,84,67,79,99,83,96
Subject 3,86,91,77,66,73,66,85,62,72,61,76,73,92,96,91,60,93,98,92,79
Subject 4,60,69,74,81,65,80,80,97,83,66,81,94,61,77,95,95,76,89,80,74
Total,301,305,307,293,319,288,326,306,296,274,326,308,331,333,355,285,330,357,330,316
Percentage,0.7525,0.7625,0.7675,0.7325,0.7975,0.72,0.815,0.765,0.74,0.685,0.815,0.77,0.8275,0.8325,0.8875,0.7125,0.825,0.8925,0.825,0.79


In [20]:
# deleting a column in dataframe 
dt.drop("Subject 2", axis=1)

Unnamed: 0,S.no,Name of student,Name of college,Class,Subject 1,Subject 3,Subject 4,Total,Percentage,Pass/Fail,Grade
0,1,Shivam,VIPS,BCA 2,94,86,60,301,0.7525,Pass,B
1,2,Maina,VIPS,BCA 3,66,91,69,305,0.7625,Pass,B
2,3,Disha,VIPS,BCA 4,75,77,74,307,0.7675,Pass,B
3,4,Anshika,VIPS,BCA 5,84,66,81,293,0.7325,Fail,C
4,5,Himanshu,VIPS,BCA 6,97,73,65,319,0.7975,Pass,B
5,6,Abhay,VIPS,BCA 7,81,66,80,288,0.72,Fail,C
6,7,Yogesh,VIPS,BCA 8,78,85,80,326,0.815,Pass,B
7,8,Mehak,VIPS,BCA 9,68,62,97,306,0.765,Pass,B
8,9,Yash,VIPS,BCA 10,73,72,83,296,0.74,Fail,C
9,10,Bhavna,VIPS,BCA 11,86,61,66,274,0.685,Fail,C


In [21]:
# selecting rows on basis of some conditions 
dt.loc[(dt['Subject 1'] > 90)]

Unnamed: 0,S.no,Name of student,Name of college,Class,Subject 1,Subject 2,Subject 3,Subject 4,Total,Percentage,Pass/Fail,Grade
0,1,Shivam,VIPS,BCA 2,94,61,86,60,301,0.7525,Pass,B
4,5,Himanshu,VIPS,BCA 6,97,84,73,65,319,0.7975,Pass,B
10,11,Aditi,VIPS,BCA 12,95,74,76,81,326,0.815,Pass,B
12,13,Rohit,VIPS,BCA 14,95,83,92,61,331,0.8275,Pass,B


In [22]:
# accessing specific element
dt.at[0, 'Total']

301

In [23]:
# accessing specific element though index
dt.iat[0, 7]

60