# Pandas 
### Series

In [76]:
# How to create a series?

In [77]:
import pandas as pd

In [78]:
# pass a list
series = pd.Series([12,7.5,'Tarun','Karnal'])
series

0        12
1       7.5
2     Tarun
3    Karnal
dtype: object

In [79]:
# pass an index
series= pd.Series([12,7.5,'Tarun','Karnal'],index=['10th marks','12th marks','name','place'])
series

10th marks        12
12th marks       7.5
name           Tarun
place         Karnal
dtype: object

In [80]:
# pass a dictionary
new_series = pd.Series({'name':'Tarun','age':19,'Place':'Karnal'})
new_series

name      Tarun
age          19
Place    Karnal
dtype: object

### Acessing elements in series
##### 1.Using index

In [81]:
new_series['name']

'Tarun'

In [82]:
new_series['age']

19

In [83]:
new_series.index


Index(['name', 'age', 'Place'], dtype='object')

In [84]:
# We  can also use the position
new_series[0]


'Tarun'

In [85]:
new_series

name      Tarun
age          19
Place    Karnal
dtype: object

In [86]:
# How to give column names
new_series.index.age = 'info'
new_series.age = 'Details'
new_series

name       Tarun
age      Details
Place     Karnal
dtype: object

In [87]:
# Looking from Dictionary again
bacteria_dict={'firmicutes':632,'proteobacteria':1638,'actinobacteria':569,'bacterodetes':115}

In [88]:




pd.Series(bacteria_dict)

firmicutes         632
proteobacteria    1638
actinobacteria     569
bacterodetes       115
dtype: int64

In [89]:
bacteria2  = pd.Series(bacteria_dict, index=['cyanobacteria','firmicutes','proteobacteria','actinobacteria'])
bacteria2

cyanobacteria        NaN
firmicutes         632.0
proteobacteria    1638.0
actinobacteria     569.0
dtype: float64

In [90]:
# How to find null values
bacteria2.isnull()

cyanobacteria      True
firmicutes        False
proteobacteria    False
actinobacteria    False
dtype: bool

In [91]:
# Adding 2 series will sum the data with labels
bacteria = pd.Series([632,1638,569,115],index= ['firmicutes','proteobacteria','actinobacteria','bacteroidetes'])
print(bacteria)
bacteria+bacteria2

firmicutes         632
proteobacteria    1638
actinobacteria     569
bacteroidetes      115
dtype: int64


actinobacteria    1138.0
bacteroidetes        NaN
cyanobacteria        NaN
firmicutes        1264.0
proteobacteria    3276.0
dtype: float64

In [92]:
newSeries = bacteria + bacteria2

In [93]:
newSeries.isnull()

actinobacteria    False
bacteroidetes      True
cyanobacteria      True
firmicutes        False
proteobacteria    False
dtype: bool

In [94]:
~newSeries.isnull()

actinobacteria     True
bacteroidetes     False
cyanobacteria     False
firmicutes         True
proteobacteria     True
dtype: bool

In [95]:
newSeries[newSeries.isnull()]

bacteroidetes   NaN
cyanobacteria   NaN
dtype: float64

In [96]:
newSeries[~newSeries.isnull()]

actinobacteria    1138.0
firmicutes        1264.0
proteobacteria    3276.0
dtype: float64

# Dataframes

In [97]:
data = pd.DataFrame({'value':[632, 1638, 569, 115, 433, 1130, 754, 555],
                     'patient':[1, 1, 1, 1, 2, 2, 2, 2],
                     'phylum':['Firmicutes', 'Proteobacteria', 'Actinobacteria', 
    'Bacteroidetes', 'Firmicutes', 'Proteobacteria', 'Actinobacteria', 'Bacteroidetes']})
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
2,569,1,Actinobacteria
3,115,1,Bacteroidetes
4,433,2,Firmicutes
5,1130,2,Proteobacteria
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [98]:
# we can select the columns using [[]] notation

data[['phylum','value']]

Unnamed: 0,phylum,value
0,Firmicutes,632
1,Proteobacteria,1638
2,Actinobacteria,569
3,Bacteroidetes,115
4,Firmicutes,433
5,Proteobacteria,1130
6,Actinobacteria,754
7,Bacteroidetes,555


In [99]:
data[['phylum']] # this a dataframe

Unnamed: 0,phylum
0,Firmicutes
1,Proteobacteria
2,Actinobacteria
3,Bacteroidetes
4,Firmicutes
5,Proteobacteria
6,Actinobacteria
7,Bacteroidetes


In [100]:
data['phylum'] # this is a series

0        Firmicutes
1    Proteobacteria
2    Actinobacteria
3     Bacteroidetes
4        Firmicutes
5    Proteobacteria
6    Actinobacteria
7     Bacteroidetes
Name: phylum, dtype: object

In [101]:
print(type(data[['phylum']]))
print(type(data['phylum']))

<class 'pandas.core.frame.DataFrame'>
<class 'pandas.core.series.Series'>


In [102]:
data.index

RangeIndex(start=0, stop=8, step=1)

In [103]:
data.columns

Index(['value', 'patient', 'phylum'], dtype='object')

In [104]:
# We can also access column by using attribute
data.value

0     632
1    1638
2     569
3     115
4     433
5    1130
6     754
7     555
Name: value, dtype: int64

## In series when we used the indexing  we got a row but in DataFrame we get column

# How to get rows?

In [105]:
print(data)

   value  patient          phylum
0    632        1      Firmicutes
1   1638        1  Proteobacteria
2    569        1  Actinobacteria
3    115        1   Bacteroidetes
4    433        2      Firmicutes
5   1130        2  Proteobacteria
6    754        2  Actinobacteria
7    555        2   Bacteroidetes


In [106]:
data.iloc[2]  # iloc is used for indexed location


value                 569
patient                 1
phylum     Actinobacteria
Name: 2, dtype: object

In [107]:
data.loc[4] # loc is used for label based or name based location

value             433
patient             2
phylum     Firmicutes
Name: 4, dtype: object

# Create or modify columns by assignment

In [108]:
data['value'][3] =1000

A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: http://pandas.pydata.org/pandas-docs/stable/indexing.html#indexing-view-versus-copy
  """Entry point for launching an IPython kernel.


In [109]:
data.value # above warning tells that changes may or may not take using assignment in the data

0     632
1    1638
2     569
3    1000
4     433
5    1130
6     754
7     555
Name: value, dtype: int64

In [110]:
# add new column

In [111]:
import numpy as np
data['new_column']=np.arange(1,80,10)

In [112]:
data

Unnamed: 0,value,patient,phylum,new_column
0,632,1,Firmicutes,1
1,1638,1,Proteobacteria,11
2,569,1,Actinobacteria,21
3,1000,1,Bacteroidetes,31
4,433,2,Firmicutes,41
5,1130,2,Proteobacteria,51
6,754,2,Actinobacteria,61
7,555,2,Bacteroidetes,71


In [113]:
# del can be used to delete the columns
del data['new_column']
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
2,569,1,Actinobacteria
3,1000,1,Bacteroidetes
4,433,2,Firmicutes
5,1130,2,Proteobacteria
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [114]:
# how to get numpy array from a DataFrame
data.values

array([[632, 1, 'Firmicutes'],
       [1638, 1, 'Proteobacteria'],
       [569, 1, 'Actinobacteria'],
       [1000, 1, 'Bacteroidetes'],
       [433, 2, 'Firmicutes'],
       [1130, 2, 'Proteobacteria'],
       [754, 2, 'Actinobacteria'],
       [555, 2, 'Bacteroidetes']], dtype=object)

In [115]:
data.index

RangeIndex(start=0, stop=8, step=1)

In [116]:
data.reindex(data.index[::-1])

Unnamed: 0,value,patient,phylum
7,555,2,Bacteroidetes
6,754,2,Actinobacteria
5,1130,2,Proteobacteria
4,433,2,Firmicutes
3,1000,1,Bacteroidetes
2,569,1,Actinobacteria
1,1638,1,Proteobacteria
0,632,1,Firmicutes


In [117]:
# give index less than length 
data.reindex(data.index[1:5])

Unnamed: 0,value,patient,phylum
1,1638,1,Proteobacteria
2,569,1,Actinobacteria
3,1000,1,Bacteroidetes
4,433,2,Firmicutes


In [118]:
data.reindex(data.index[1:5:2])

Unnamed: 0,value,patient,phylum
1,1638,1,Proteobacteria
3,1000,1,Bacteroidetes


In [119]:
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
2,569,1,Actinobacteria
3,1000,1,Bacteroidetes
4,433,2,Firmicutes
5,1130,2,Proteobacteria
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [123]:
# Delete rows
data = data.drop([3])

In [124]:
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
4,433,2,Firmicutes
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [126]:
#numpy style slicing
data[:3]

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
4,433,2,Firmicutes


In [127]:
data.iloc[:3,:]

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
4,433,2,Firmicutes


In [128]:
# selecting rows and columns 
data.iloc[2:3,1:3]

Unnamed: 0,patient,phylum
4,2,Firmicutes


In [131]:
data.iloc[2:3,2:10] # in this slicing 10  is not throwing error by using 10 it print all present columns

Unnamed: 0,phylum
4,Firmicutes


In [132]:
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
4,433,2,Firmicutes
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [137]:
# get values using names

data.iloc[:,'patient':'value']  ## DOUBT : why slicing is not happening

TypeError: cannot do slice indexing on <class 'pandas.core.indexes.base.Index'> with these indexers [patient] of <class 'str'>

In [138]:
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
4,433,2,Firmicutes
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [139]:
# Sorting values

data.sort_values(ascending=False,by ='value')

Unnamed: 0,value,patient,phylum
1,1638,1,Proteobacteria
6,754,2,Actinobacteria
0,632,1,Firmicutes
7,555,2,Bacteroidetes
4,433,2,Firmicutes


In [140]:
data.sort_values(ascending=False,by='patient')

Unnamed: 0,value,patient,phylum
4,433,2,Firmicutes
6,754,2,Actinobacteria
7,555,2,Bacteroidetes
0,632,1,Firmicutes
1,1638,1,Proteobacteria


In [141]:
data.sort_values('patient')

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
4,433,2,Firmicutes
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [142]:
data.sort_values(by='patient')

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
4,433,2,Firmicutes
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [144]:
data.sort_values() # by argument in sort_values() is necessary

TypeError: sort_values() missing 1 required positional argument: 'by'

In [145]:
data.sort_values(by=['patient','phylum'],ascending=[False,True])

Unnamed: 0,value,patient,phylum
6,754,2,Actinobacteria
7,555,2,Bacteroidetes
4,433,2,Firmicutes
0,632,1,Firmicutes
1,1638,1,Proteobacteria


In [146]:
data

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
4,433,2,Firmicutes
6,754,2,Actinobacteria
7,555,2,Bacteroidetes


In [147]:
data.reset_index(drop=True)

Unnamed: 0,value,patient,phylum
0,632,1,Firmicutes
1,1638,1,Proteobacteria
2,433,2,Firmicutes
3,754,2,Actinobacteria
4,555,2,Bacteroidetes


In [148]:
# We can also use data.set_index('column_name')

In [149]:
data.set_index('value')

Unnamed: 0_level_0,patient,phylum
value,Unnamed: 1_level_1,Unnamed: 2_level_1
632,1,Firmicutes
1638,1,Proteobacteria
433,2,Firmicutes
754,2,Actinobacteria
555,2,Bacteroidetes


In [150]:
data.drop('value',axis=1,inplace=True)

In [151]:
data

Unnamed: 0,patient,phylum
0,1,Firmicutes
1,1,Proteobacteria
4,2,Firmicutes
6,2,Actinobacteria
7,2,Bacteroidetes
