## Pandas Library

In [1]:
import pandas as pd
import numpy as np
# dir(pd)

### Types
- series: 1-D array
- Dataframes: It is a format of rows and columns

### Series:
- pd.Series

In [2]:
# help(pd.Series)

### Different ways to create Series
- List
- Tuples
- Dictionary
- Numpy array

In [3]:
s1 = pd.Series([1,2,3,4,5,6])
s1

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [4]:
s2 = pd.Series((1,2,3,4,5,6))
s2

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [5]:
d={1:2,2:"sandeep",3:"Martur"}
d

{1: 2, 2: 'sandeep', 3: 'Martur'}

In [6]:
s3 = pd.Series(d)
s3

1          2
2    sandeep
3     Martur
dtype: object

In [7]:
a =np.array([1,2,3,4]) 
s4 = pd.Series(a)
s4

0    1
1    2
2    3
3    4
dtype: int32

### Operations b/w Series(+,-,*,...)

In [8]:
s5 = pd.Series([1,2,3,4,5])
s6 = pd.Series([6,7,8,9,10])
s5 + s6

0     7
1     9
2    11
3    13
4    15
dtype: int64

In [9]:
s5 * s6

0     6
1    14
2    24
3    36
4    50
dtype: int64

In [10]:
s5 / s6

0    0.166667
1    0.285714
2    0.375000
3    0.444444
4    0.500000
dtype: float64

In [11]:
s5 %s6

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [12]:
s5 ** s6

0          1
1        128
2       6561
3     262144
4    9765625
dtype: int64

In [13]:
# Define user defined index for Series
s7 = pd.Series([1,2,3,4],index=['a','b','c','d'])
s7

a    1
b    2
c    3
d    4
dtype: int64

# data frames
- pd.DataFrame()

In [14]:
# help(pd.DataFrame)
d = {'srh':["kane","warner","bairstow"],'csk':['dhoni','raina','pujara']}
df1 = pd.DataFrame(d,index=[1,2,3])
df1

Unnamed: 0,srh,csk
1,kane,dhoni
2,warner,raina
3,bairstow,pujara


### Indexing 
- selecting rows
    - df[stat_val:stop_val]
- selecting columns
    - df[col_name] or df.col_name
- selecitng multiple columns
    - df[['col_name','col_name2']]

In [15]:
df1[0:2]

Unnamed: 0,srh,csk
1,kane,dhoni
2,warner,raina


In [16]:
df1['srh']
# df1.srh

1        kane
2      warner
3    bairstow
Name: srh, dtype: object

In [17]:
df1['srh'][2]

'warner'

In [18]:
df1[['srh','csk']]

Unnamed: 0,srh,csk
1,kane,dhoni
2,warner,raina
3,bairstow,pujara


In [19]:
df1[2]

KeyError: 2

### iloc and loc
- iloc- postion based index
- loc- label based index

In [None]:
df1.iloc[0]

In [None]:
type(df1.iloc[2])

In [None]:
df1.loc[2]

In [None]:
df1.iloc[0:2,1]

In [None]:
df1.loc[1:2,['srh','csk']]

In [None]:
d = {"sports":['cric','volley','shuttle','kabadi'],'score':[9,7,7,7],'teams':['ind','aus','eng','nz']}
d1 = {"sports":['football','basketball','athletic','highjump'],'score':[9,7,7,7],'teams':['brazil','usa','russia','scottland']}
df1 = pd.DataFrame(d,index=['A','B','C','D'])
df2 = pd.DataFrame(d1,index=['A','B','C','D'])

In [None]:
df1

In [None]:
df2

### combine df1 and df2
- pd.concat()

In [None]:
pd.concat([df1,df2]) #row combination

In [None]:
pd.concat([df1,df2],axis =1)

### adding row and column

In [None]:
df1.append({'sports':'Longjump','score':6,'teams':'germany'},ignore_index=True)

In [None]:
df1['home']=['delhi','syndey','london','melborne']
df1

## getting and loading dataset

In [None]:
df = pd.read_csv('market_fact.csv')
df.head()

In [None]:
df.tail()

In [None]:
df.sample()

In [None]:
df.sample(5)

In [None]:
df.columns

In [None]:
df.index

In [20]:
df2 = pd.read_csv('https://raw.githubusercontent.com/nagamounika5/Datasets/master/Global%20Dataset/Market_Fact.csv')
df2

Unnamed: 0,Ord_id,Prod_id,Ship_id,Cust_id,Sales,Discount,Order_Quantity,Profit,Shipping_Cost,Product_Base_Margin
0,Ord_5446,Prod_16,SHP_7609,Cust_1818,136.8100,0.01,23,-30.51,3.60,0.56
1,Ord_5406,Prod_13,SHP_7549,Cust_1818,42.2700,0.01,13,4.56,0.93,0.54
2,Ord_5446,Prod_4,SHP_7610,Cust_1818,4701.6900,0.00,26,1148.90,2.50,0.59
3,Ord_5456,Prod_6,SHP_7625,Cust_1818,2337.8900,0.09,43,729.34,14.30,0.37
4,Ord_5485,Prod_17,SHP_7664,Cust_1818,4233.1500,0.08,35,1219.87,26.30,0.38
...,...,...,...,...,...,...,...,...,...,...
8394,Ord_5353,Prod_4,SHP_7479,Cust_1798,2841.4395,0.08,28,374.63,7.69,0.59
8395,Ord_5411,Prod_6,SHP_7555,Cust_1798,127.1600,0.10,20,-74.03,6.92,0.37
8396,Ord_5388,Prod_6,SHP_7524,Cust_1798,243.0500,0.02,39,-70.85,5.35,0.40
8397,Ord_5348,Prod_15,SHP_7469,Cust_1798,3872.8700,0.03,23,565.34,30.00,0.62
