# Pandas
A library to work on tabular data. To store data in the form of table. 

### Install

In [1]:
!pip install pandas



In [2]:
import numpy as np
import pandas as pd

##### Create a data frame

In [3]:
user_data ={
    "MarksA":np.random.randint(10,100,5),
    "MarksB":np.random.randint(10,100,5),
    "MarksC":np.random.randint(10,100,5)
}

print(user_data)

{'MarksA': array([63, 23, 64, 73, 87]), 'MarksB': array([94, 83, 61, 22, 31]), 'MarksC': array([91, 76, 54, 98, 77])}


In [4]:
np.random.randint(10,100,5)

array([18, 88, 34, 11, 85])

## Suppose i want to create table 

In [5]:
df = pd.DataFrame(user_data)
print(df)

   MarksA  MarksB  MarksC
0      63      94      91
1      23      83      76
2      64      61      54
3      73      22      98
4      87      31      77


In [6]:
#TO display in better way
df.head()

Unnamed: 0,MarksA,MarksB,MarksC
0,63,94,91
1,23,83,76
2,64,61,54
3,73,22,98
4,87,31,77


In [7]:
#Display particular n
df.head(n=3)

Unnamed: 0,MarksA,MarksB,MarksC
0,63,94,91
1,23,83,76
2,64,61,54


In [8]:
#To get the columns
df.columns

Index(['MarksA', 'MarksB', 'MarksC'], dtype='object')

### To Export Data Into CSV Files

In [9]:
#To export the csv file
#df.to_csv('filename.csv')
df.to_csv('marks.csv')

In [10]:
my_data = pd.read_csv('marks.csv')

In [11]:
print(my_data)

   Unnamed: 0  MarksA  MarksB  MarksC
0           0      63      94      91
1           1      23      83      76
2           2      64      61      54
3           3      73      22      98
4           4      87      31      77


In [12]:
#i dont need the first columns
my_data = my_data.drop(columns=['Unnamed: 0'] )
print(my_data)

   MarksA  MarksB  MarksC
0      63      94      91
1      23      83      76
2      64      61      54
3      73      22      98
4      87      31      77


### To get Statistic of Data

In [13]:
my_data.describe()

Unnamed: 0,MarksA,MarksB,MarksC
count,5.0,5.0,5.0
mean,62.0,58.2,79.2
std,23.832751,31.4436,16.902663
min,23.0,22.0,54.0
25%,63.0,31.0,76.0
50%,64.0,61.0,77.0
75%,73.0,83.0,91.0
max,87.0,94.0,98.0


In [34]:
#Last 5 rows
my_data.tail()

Unnamed: 0,MarksA,MarksB,MarksC
0,63,94,91
1,23,83,76
2,64,61,54
3,73,22,98
4,87,31,77


In [15]:
#To get particular Row
df.iloc[3]

MarksA    73
MarksB    22
MarksC    98
Name: 3, dtype: int32

In [16]:
#To get particular row & col
df.iloc[3,1]

22

In [17]:
df.iloc[3][1]

22

In [18]:
#to get index of columns
idx = [df.columns.get_loc('MarksB'),df.columns.get_loc('MarksC')]
print(idx)
# In 3rd row for 1st and 2nd col
df.iloc[3,idx]

[1, 2]


MarksB    22
MarksC    98
Name: 3, dtype: int32

In [19]:
#take the first 3 rows and col 1&2
df.iloc[:3,idx]

Unnamed: 0,MarksB,MarksC
0,94,91
1,83,76
2,61,54


In [20]:
df.iloc[:3,[1,2]]

Unnamed: 0,MarksB,MarksC
0,94,91
1,83,76
2,61,54


In [21]:
##Sort your data frame on basis of marks

my_data.sort_values(by=["MarksA"],ascending=False)

Unnamed: 0,MarksA,MarksB,MarksC
4,87,31,77
3,73,22,98
2,64,61,54
0,63,94,91
1,23,83,76


In [22]:
my_data.sort_values(by=["MarksA"],ascending=True)

Unnamed: 0,MarksA,MarksB,MarksC
1,23,83,76
0,63,94,91
2,64,61,54
3,73,22,98
4,87,31,77


In [23]:
#One with highest marks in c then in A
my_data.sort_values(by=["MarksC","MarksA"],ascending=False)

Unnamed: 0,MarksA,MarksB,MarksC
3,73,22,98
0,63,94,91
4,87,31,77
1,23,83,76
2,64,61,54


## Pandas Into Numpy Arrays

In [24]:
data_array = my_data.values

In [25]:
print(type(my_data))
print(my_data.shape)

<class 'pandas.core.frame.DataFrame'>
(5, 3)


In [26]:
print(data_array)
print(type(data_array))
print(data_array.shape)

[[63 94 91]
 [23 83 76]
 [64 61 54]
 [73 22 98]
 [87 31 77]]
<class 'numpy.ndarray'>
(5, 3)


## Numpy Arrays Back Into Data Frame

In [27]:
new_df = pd.DataFrame(data_array,dtype='int32',columns=["Physics","Chemistry","Maths"])

In [28]:
print(new_df)

   Physics  Chemistry  Maths
0       63         94     91
1       23         83     76
2       64         61     54
3       73         22     98
4       87         31     77


In [29]:
new_df.to_csv("PCM.csv",index=False)

In [30]:
#to read documentation
#new_df.to_csv?

In [31]:
pcm = pd.read_csv('PCM.csv')

In [32]:
print(pcm)

   Physics  Chemistry  Maths
0       63         94     91
1       23         83     76
2       64         61     54
3       73         22     98
4       87         31     77


In [33]:
import pandas as pd 
import matplotlib.pyplot as plt 
  
# create 2D array of table given above 
data = [['E001', 'M', 34, 123, 'Normal', 350], 
        ['E002', 'F', 40, 114, 'Overweight', 450], 
        ['E003', 'F', 37, 135, 'Obesity', 169], 
        ['E004', 'M', 30, 139, 'Underweight', 189], 
        ['E005', 'F', 44, 117, 'Underweight', 183], 
        ['E006', 'M', 36, 121, 'Normal', 80], 
        ['E007', 'M', 32, 133, 'Obesity', 166], 
        ['E008', 'F', 26, 140, 'Normal', 120], 
        ['E009', 'M', 32, 133, 'Normal', 75], 
        ['E010', 'M', 36, 133, 'Underweight', 40] ] 
  
# dataframe created with 
# the above data array 
df = pd.DataFrame(data, columns = ['EMPID', 'Gender',  
                                    'Age', 'Sales', 
                                    'BMI', 'Income'] ) 
print(df)
  
# create histogram for numeric data 
df.hist() 
  
# show plot 
plt.show() 

  EMPID Gender  Age  Sales          BMI  Income
0  E001      M   34    123       Normal     350
1  E002      F   40    114   Overweight     450
2  E003      F   37    135      Obesity     169
3  E004      M   30    139  Underweight     189
4  E005      F   44    117  Underweight     183
5  E006      M   36    121       Normal      80
6  E007      M   32    133      Obesity     166
7  E008      F   26    140       Normal     120
8  E009      M   32    133       Normal      75
9  E010      M   36    133  Underweight      40


<Figure size 640x480 with 4 Axes>