## Idiomatic programming
- transformation of code into beautiful idiomatic python
- replaces traditional index manipulation with pyhton code looping idioms

In [1]:
#looping a range
for i in [0,1,2,3,4,5]:
    print(i**2,end=' ')
print()
#pythonic way
for i in range(6):
    print(i**2,end=' ')
    


0 1 4 9 16 25 
0 1 4 9 16 25 

In [6]:
#looping over collection(list)
li=[1,2,3,4,5]
for i in range(len(li)):
    print(li[i],end=' ')
print()
#pythonic way
for i in li:
    print(i,end=' ')

1 2 3 4 5 
1 2 3 4 5 

In [8]:
#looping from backwards
li=[1,2,3,4,5]
for i in range(len(li)-1,-1,-1):
    print(li[i],end=' ')
print()
#pythonic way
for i in reversed(li):
    print(i,end=' ')

5 4 3 2 1 
5 4 3 2 1 

In [10]:
#loooping over the collection with index
li=[1,2,3,4,5]
for i in range(len(li)):
    print(i,'-->',li[i])
print()
#pythonic way
for i,a in enumerate(li):
    print(i,'-->',a)

0 --> 1
1 --> 2
2 --> 3
3 --> 4
4 --> 5

0 --> 1
1 --> 2
2 --> 3
3 --> 4
4 --> 5


In [13]:
li=[1,2,3,4,5]
a=[1,2,3,4,5,6,7,8]
n=min(len(li),len(a))
for i in range(n):
    print(li[i],' ',a[i])
print()
#pythonic way
for i,a in zip(li,a):
    print(i,' ',a)
    

1   1
2   2
3   3
4   4
5   5

1   1
2   2
3   3
4   4
5   5


In [16]:
#looping in sorted order
#pythonic way
li=[8,1,2,5,2,3]
for i in sorted(li):
    print(i,end=' ')
print()
for i in sorted(li,reverse=True):
    print(i,end=' ')

1 2 2 3 5 8 
8 5 3 2 2 1 

In [20]:
#dictionary
d={"name":"Lewis","E-mail":"Lhamilton12@gmail.com","Address":"Bronx, NY"}
for k in d:
    print(k,end=' ')
print()

name E-mail Address 


In [23]:
for k in d.keys():
    if k.startswith('A'):
        del d[k]

In [25]:
print(d)

{'name': 'Lewis', 'E-mail': 'Lhamilton12@gmail.com'}


In [28]:
for k in d.values():
    print(k,end=' ')

Lewis Lhamilton12@gmail.com 

## list comprehension

In [30]:
#trad approach
li=[]
for i in range(10):
    a=i**2
    li.append(a)
print(sum(li))
print()
#pythonic way
print(sum(i**2 for i in range(10)))

285

285


In [32]:
li=[]
for i in range(10):
    li.append(i**2)
li

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

In [34]:
com_list=[i**2 for i in range (10)]
com_list

[0, 1, 4, 9, 16, 25, 36, 49, 64, 81]

for i in li:
    if i%2==0:
        print(i,end=' ')

In [2]:
com_list=[i**2 for i in range(10) if i %2==0]
com_list

[0, 4, 16, 36, 64]

# Lambda Functions
- anonymous functions means that a function is without name
- any lambda function on python will be defined with lambda
- syntax: lambda arg: expression

In [4]:
def square(n):
    return n*n
square(10)

100

##### a=lambda x:x*x
print(a(10))

## use of lambda with filter 

def filterlist(li):
    a=[]
    
    for i in li:
        
        if i% 2==0:
            
            a.append(i)
    return a
li=[1,2,3,4,5,6,7,8]
filterlist(li)
            

In [11]:
li=[1,2,3,4,6,7,8]
lam_li=list(filter(lambda x: (x%2==0),li))       #performs checks condn. and returns unmodified value
print(lam_li)

[2, 4, 6, 8]


## lambda with map()
- map function in pyhton takes in a function and list as argument


In [14]:
def squarelist(li):
    a=[]
    for i in li:
        a.append(i*i)
    return a
li=[1,2,3,4,5,6,7,8]
squarelist(li)

[1, 4, 9, 16, 25, 36, 49, 64]

In [20]:
li=[1,2,3,4,5,6,7,8]
map_list= list(map(lambda x:x*x,li))      #performs the given function and returns modified value
print(map_list)

[1, 4, 9, 16, 25, 36, 49, 64]


## lambda with reduce()


In [22]:
def sumlist(li):
    s=0
    for i in li:
        s+=i
    return s
li=[1,2,3,4,5,6,7,8]
sumlist(li)

36

In [24]:
from functools import reduce
li=[1,2,3,4,5,6,7,8]
s=reduce((lambda x,y:x+y),li)    #like recursive function
print(s)

36


# Standard Package

### Pandas, NumPy & MatplotLib

#### pandas
##### use cases
    - data cleaning
    -data transformation
    - data analysis
  






In [34]:
import pandas as pd
internal1={'s1':35,'s2':35,'s3':35}
internal2={'s1':32,'s2':33,'s3':34}
internal1=pd.Series(internal1)                        #Series is 1-D
internal2=pd.Series(internal2)
print(internal1)
print(internal2)


s1    35
s2    35
s3    35
dtype: int64
s1    32
s2    33
s3    34
dtype: int64


In [35]:
final = {'Internal1':internal1,'Internal2':internal2}
final = pd.DataFrame(final)                              #DataFrame is 2-D (Tabular(row/column))
print(final)

    Internal1  Internal2
s1         35         32
s2         35         33
s3         35         34


In [36]:
final.columns                   #returns names of column

Index(['Internal1', 'Internal2'], dtype='object')

In [37]:
final.values                     # returns values in 2-d array

array([[35, 32],
       [35, 33],
       [35, 34]], dtype=int64)

In [40]:
final.values[2][0] = 99
final.values                     #update dataframe values

array([[35, 32],
       [35, 33],
       [99, 34]], dtype=int64)

In [43]:
for row in final.values:
    print('Internal1 - ',row[0],'Internal2 - ',row[1])

Internal1 -  35 Internal2 -  32
Internal1 -  35 Internal2 -  33
Internal1 -  99 Internal2 -  34


In [54]:
final.loc['s4']=[20,10] #insert the new records in the dataforms
final

Unnamed: 0,Internal1,Internal2
s1,35,32
s2,35,33
s3,155,255
s4,20,10


In [49]:
final.values[2] = [155,255]
final

Unnamed: 0,Internal1,Internal2
s1,35,32
s2,35,33
s3,155,255
s4,20,10


In [51]:
pwd


'C:\\Users\\mohit.DESKTOP-3KV7B5E\\Desktop\\Problem solving programming'

In [59]:
filepath = 'datafile/Income.csv'
def readCsvData(filePath):
    return pd.read_csv(filepath)
df=readCsvData(filepath)
print(df)

       GEOID       State   2005   2006   2007   2008   2009   2010   2011  \
0  04000US01     Alabama  37150  37952  42212  44476  39980  40933  42590   
1  04000US02      Alaska  55891  56418  62993  63989  61604  57848  57431   
2  04000US04     Arizona  45245  46657  62993  46914  45739  46896  48621   
3  04000US05    Arkansas  36658  37057  40795  39586  36538  38587  41302   
4  04000US06  California  51755  55319  55734  57014  56134  54283  53367   

    2012   2013  
0  43464  41381  
1  63648  61137  
2  47044  50602  
3  39018  39919  
4  57020  57528  


In [63]:
#Extract data of all states in year 2009
# Alabama : 41381
#.......
for row in df.values:
    print(row[1], ' : ',row[-5])

Alabama  :  39980
Alaska  :  61604
Arizona  :  45739
Arkansas  :  36538
California  :  56134


In [60]:
print(df.values)

[['04000US01' 'Alabama' 37150 37952 42212 44476 39980 40933 42590 43464
  41381]
 ['04000US02' 'Alaska' 55891 56418 62993 63989 61604 57848 57431 63648
  61137]
 ['04000US04' 'Arizona' 45245 46657 62993 46914 45739 46896 48621 47044
  50602]
 ['04000US05' 'Arkansas' 36658 37057 40795 39586 36538 38587 41302 39018
  39919]
 ['04000US06' 'California' 51755 55319 55734 57014 56134 54283 53367
  57020 57528]]


In [65]:
#Average income of California(4th row 1st col)
def avg():
    s=0
    for i in range (2,11):
        s+=df.values[4][i]
    return s//len(df.values[4][2:])
avg()

55350

In [66]:
#function which displays the names in the list
# GEOID state 2005   2006   2007   2008   2009   2010   2011   2012   2013
def printdataframecol(df):
    li=[]
    columns=df.columns
    for i in columns:
        li.append(i)
    return li
printdataframecol(df)

['GEOID',
 'State',
 '2005',
 '2006',
 '2007',
 '2008',
 '2009',
 '2010',
 '2011',
 '2012',
 '2013']

In [67]:
filepath = 'datafile/RegularSeasonCompactResults.csv'
def readCsvData(filePath):
    return pd.read_csv(filepath)
df=readCsvData(filepath)
print(df)

        Season  Daynum  Wteam  Wscore  Lteam  Lscore Wloc  Numot
0         1985      20   1228      81   1328      64    N      0
1         1985      25   1106      77   1354      70    H      0
2         1985      25   1112      63   1223      56    H      0
3         1985      25   1165      70   1432      54    H      0
4         1985      25   1192      86   1447      74    H      0
5         1985      25   1218      79   1337      78    H      0
6         1985      25   1228      64   1226      44    N      0
7         1985      25   1242      58   1268      56    N      0
8         1985      25   1260      98   1133      80    H      0
9         1985      25   1305      97   1424      89    H      0
10        1985      25   1307     103   1288      71    H      0
11        1985      25   1344      75   1438      71    N      0
12        1985      25   1374      91   1411      72    H      0
13        1985      25   1412      70   1397      65    N      0
14        1985      25   

In [68]:
#to know the length of rows and columns -- shape

df.shape  #(rows, columns)

(145289, 8)

In [69]:
#to know only first few rows -- head()
df.head()

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0
3,1985,25,1165,70,1432,54,H,0
4,1985,25,1192,86,1447,74,H,0


In [70]:
#to know last record -- tail
df.tail()


Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
145284,2016,132,1114,70,1419,50,N,0
145285,2016,132,1163,72,1272,58,N,0
145286,2016,132,1246,82,1401,77,N,1
145287,2016,132,1277,66,1345,62,N,0
145288,2016,132,1386,87,1433,74,N,0


In [71]:
# convert into list simple syntax
df.columns.tolist()

['Season', 'Daynum', 'Wteam', 'Wscore', 'Lteam', 'Lscore', 'Wloc', 'Numot']

In [73]:
#describe() min,max,count,mean etc
df.describe()

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Numot
count,145289.0,145289.0,145289.0,145289.0,145289.0,145289.0,145289.0
mean,2001.574834,75.223816,1286.720646,76.600321,1282.864064,64.497009,0.044387
std,9.233342,33.287418,104.570275,12.173033,104.829234,11.380625,0.247819
min,1985.0,0.0,1101.0,34.0,1101.0,20.0,0.0
25%,1994.0,47.0,1198.0,68.0,1191.0,57.0,0.0
50%,2002.0,78.0,1284.0,76.0,1280.0,64.0,0.0
75%,2010.0,103.0,1379.0,84.0,1375.0,72.0,0.0
max,2016.0,132.0,1464.0,186.0,1464.0,150.0,6.0


In [74]:
df.max()    # maximun of all columns

Season    2016
Daynum     132
Wteam     1464
Wscore     186
Lteam     1464
Lscore     150
Wloc         N
Numot        6
dtype: object

In [76]:
#particular column max and min
print(df['Lteam'].max())
print(df['Lteam'].min())

1464
1101


In [77]:
df.loc[:3]      #not considering indexes

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0
3,1985,25,1165,70,1432,54,H,0


In [79]:
df.iloc[:3]   #including index

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
0,1985,20,1228,81,1328,64,N,0
1,1985,25,1106,77,1354,70,H,0
2,1985,25,1112,63,1223,56,H,0


In [82]:
df.sort_values('Lscore').head()

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
100027,2008,66,1203,49,1387,20,H,0
49310,1997,66,1157,61,1204,21,H,0
89021,2006,44,1284,41,1343,21,A,0
85042,2005,66,1131,73,1216,22,H,0
103660,2009,26,1326,59,1359,22,H,0


In [83]:
df.sort_values('Lscore').tail()

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
77873,2003,110,1383,142,1254,140,H,2
24970,1991,68,1258,186,1109,140,H,0
22074,1990,96,1261,148,1258,141,H,0
16853,1989,68,1258,162,1109,144,A,0
17867,1989,92,1258,181,1109,150,H,0


In [88]:
#filter the record on condn.
df[df['Wscore'] > 150]      #score greater than 150

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
5269,1986,75,1258,151,1109,107,H,0
12046,1988,40,1328,152,1147,84,H,0
12355,1988,52,1328,151,1173,99,N,0
16040,1989,40,1328,152,1331,122,H,0
16853,1989,68,1258,162,1109,144,A,0
17867,1989,92,1258,181,1109,150,H,0
19653,1990,30,1328,173,1109,101,H,0
19971,1990,38,1258,152,1109,137,A,0
20022,1990,40,1116,166,1109,101,H,0
22145,1990,97,1258,157,1362,115,H,0


In [89]:
df[(df['Wscore']>170) & (df['Daynum']> 60)]

Unnamed: 0,Season,Daynum,Wteam,Wscore,Lteam,Lscore,Wloc,Numot
17867,1989,92,1258,181,1109,150,H,0
24970,1991,68,1258,186,1109,140,H,0
