### Run this first

In [192]:
import numpy as np
import pandas as pd

## Creating Series Objects

In [9]:
s = pd.Series([0,1,1,2,3,5,8])
f = pd.Series([0.0,1,1,2,3,5,8])
print(s)
print(type(s))
print(f)
print(type(f))

0    0
1    1
2    1
3    2
4    3
5    5
6    8
dtype: int64
<class 'pandas.core.series.Series'>
0    0.0
1    1.0
2    1.0
3    2.0
4    3.0
5    5.0
6    8.0
dtype: float64
<class 'pandas.core.series.Series'>


In [26]:
print(s.values)
print(s.keys())
print(s.index)
for i in s.values:
    print(i)
for i in s.index:
    print(i)

[0 1 1 2 3 5 8]
RangeIndex(start=0, stop=7, step=1)
RangeIndex(start=0, stop=7, step=1)
0
1
1
2
3
5
8
0
1
2
3
4
5
6


In [32]:
for i in zip(s.index,s.values):
    print(i)

(0, 0)
(1, 1)
(2, 1)
(3, 2)
(4, 3)
(5, 5)
(6, 8)


In [36]:
print(s[0])
print(s[6])

0
8


In [65]:
mercury = pd.Series([0.33,57.9,4222.6], index = ['mass','diameter','dayLength'])
print(mercury)
print(mercury['diameter'])
print(mercury['dayLength'])
# do not use below methods
print(mercury.diameter)
print(mercury.dayLength)

mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64
57.9
4222.6
57.9
4222.6


In [66]:
arr = np.random.randint(0,10,10)
index = np.arange(10,20)
rand_series = pd.Series(arr,index = index)
print(rand_series)

10    7
11    2
12    1
13    1
14    4
15    8
16    7
17    5
18    6
19    4
dtype: int32


In [67]:
d = {}
d['mass'] = 0.33
d['diameter'] = 57.9
d['dayLength'] = 4222.6

mercury = pd.Series(d)
print(mercury)

mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64


In [68]:
mercury = pd.Series(d,index = ['mass','diameter','dayLength'])
print(mercury)

mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64


In [70]:
# Only those indices which are included below in the index are taken instead of the entire dictionary
mercury = pd.Series(d,index = ['mass','diameter'])
print(mercury)

mass         0.33
diameter    57.90
dtype: float64


### iLoc & Loc

In [93]:
s = pd.Series([0.0,1,1,2,3,5,8],index=[1,2,3,4,5,6,7])
print(s)
# gives value at specified location - location
print(s.loc[4])
# gives value at implicit location - integer location
print(s.iloc[4])

1    0.0
2    1.0
3    1.0
4    2.0
5    3.0
6    5.0
7    8.0
dtype: float64
2.0
3.0


In [106]:
mercury = pd.Series([0.33,57.9,4222.6], index = ['mass','diameter','dayLength'])
print(mercury.loc['mass'])
print(mercury.iloc[-1])
print(mercury.iloc[0])
print(mercury.iloc[1:]) # is a new series
print(mercury.iloc[1:][1])
# slicing can be done to both loc and iloc
print(mercury.loc['diameter':])
print(mercury.loc['diameter':]['dayLength'])

0.33
4222.6
0.33
diameter       57.9
dayLength    4222.6
dtype: float64
4222.6
diameter       57.9
dayLength    4222.6
dtype: float64
4222.6


### Simple Operations

In [3]:
mass = pd.Series([0.33,4.87,5.97,0.642,1898,568,86.8,102,0.0146], index = ['Mercury','Venus','Earth','Mars','Jupiter','Saturn','Uranus','Neptune','Pluto'])
print(mass)
print(mass.iloc[1])
print(mass.iloc[2:5])
print(mass.loc['Earth':'Jupiter'])
print(mass.loc['Earth':'Jupiter']['Mars'])

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64
4.87
Earth         5.970
Mars          0.642
Jupiter    1898.000
dtype: float64
Earth         5.970
Mars          0.642
Jupiter    1898.000
dtype: float64
0.642


In [121]:
print(mass > 100)
print(mass[mass > 100])

Mercury    False
Venus      False
Earth      False
Mars       False
Jupiter     True
Saturn      True
Uranus     False
Neptune     True
Pluto      False
dtype: bool
Jupiter    1898.0
Saturn      568.0
Neptune     102.0
dtype: float64


In [130]:
print(mass[ (mass > 3) & (mass < 150)])

Venus        4.87
Earth        5.97
Uranus      86.80
Neptune    102.00
dtype: float64


In [131]:
print(mass*2)
print(mass/10)

Mercury       0.6600
Venus         9.7400
Earth        11.9400
Mars          1.2840
Jupiter    3796.0000
Saturn     1136.0000
Uranus      173.6000
Neptune     204.0000
Pluto         0.0292
dtype: float64
Mercury      0.03300
Venus        0.48700
Earth        0.59700
Mars         0.06420
Jupiter    189.80000
Saturn      56.80000
Uranus       8.68000
Neptune     10.20000
Pluto        0.00146
dtype: float64


In [133]:
print(np.mean(mass))
print(np.median(mass))
print(np.std(mass))

296.29184444444445
5.97
591.8827216151058


In [134]:
print(np.amin(mass))
print(np.amax(mass))

0.0146
1898.0


In [137]:
print(mass*mass)
print(mass + mass)

Mercury    1.089000e-01
Venus      2.371690e+01
Earth      3.564090e+01
Mars       4.121640e-01
Jupiter    3.602404e+06
Saturn     3.226240e+05
Uranus     7.534240e+03
Neptune    1.040400e+04
Pluto      2.131600e-04
dtype: float64
Mercury       0.6600
Venus         9.7400
Earth        11.9400
Mars          1.2840
Jupiter    3796.0000
Saturn     1136.0000
Uranus      173.6000
Neptune     204.0000
Pluto         0.0292
dtype: float64


In [138]:
print(mass - mass)

Mercury    0.0
Venus      0.0
Earth      0.0
Mars       0.0
Jupiter    0.0
Saturn     0.0
Uranus     0.0
Neptune    0.0
Pluto      0.0
dtype: float64


In [147]:
big_mass = mass[mass > 100]
big_mass

Jupiter    1898.0
Saturn      568.0
Neptune     102.0
dtype: float64

In [148]:
# Does not throw error even if two objects are different dimension
new_mass = mass + big_mass
print(new_mass)

Earth         NaN
Jupiter    3796.0
Mars          NaN
Mercury       NaN
Neptune     204.0
Pluto         NaN
Saturn     1136.0
Uranus        NaN
Venus         NaN
dtype: float64


In [150]:
print(pd.isnull(new_mass))

Earth       True
Jupiter    False
Mars        True
Mercury     True
Neptune    False
Pluto       True
Saturn     False
Uranus      True
Venus       True
dtype: bool


In [154]:
new_mass[~pd.isnull(new_mass)]

Jupiter    3796.0
Neptune     204.0
Saturn     1136.0
dtype: float64

#### Adding element to series object

In [4]:
mass['Moon'] = 0.7346
mass['PlanetX'] = 6
print(mass)

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
Moon          0.7346
PlanetX       6.0000
dtype: float64


In [158]:
mass.drop('Pluto') # mass.drop(['Pluto'])

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Moon          0.7346
dtype: float64

#### Collect numbers for diameters for these planets and store it as a series object. Then given these two series object mass and diameter , compute the density of each planet

In [5]:
diameter = pd.Series([4879, 12104, 12756, 3475, 6792, 142984, 120536, 51118, 49528, 2370], 
                     index=['Mercury', 'Venus', 'Earth', 'Moon', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])

In [6]:
density = mass / (np.pi*(diameter**3)/6)
density

Earth      5.493286e-12
Jupiter    1.240039e-12
Mars       3.913302e-12
Mercury    5.426538e-12
Moon       3.343396e-11
Neptune    1.603427e-12
PlanetX             NaN
Pluto      2.094639e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Venus      5.244977e-12
dtype: float64

#### Given this density Series, replace all values which NaNs with the mean density of all planets

In [7]:
print(density.fillna(np.mean(density))) # density[pd.isnull(density)] = np.mean(density)

Earth      5.493286e-12
Jupiter    1.240039e-12
Mars       3.913302e-12
Mercury    5.426538e-12
Moon       3.343396e-11
Neptune    1.603427e-12
PlanetX    6.031069e-12
Pluto      2.094639e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Venus      5.244977e-12
dtype: float64


#### Compare Dictionary with Series: 
- checking if some key is present
- summing values
- computing std

In [4]:
my_dict = {}
N = 1000000
for i in range(N):
    my_dict[i] = i%10

In [5]:
my_series = pd.Series(my_dict)

In [6]:
M = 10000

In [7]:
arr = np.random.randint(0, N, M)

In [8]:
%%timeit
for i in arr:
    i in my_dict

3.4 ms ± 640 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [9]:
%%timeit
for i in arr:
    i in my_series

7.16 ms ± 39.4 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [10]:
%%timeit
sum(my_dict.values())

10.9 ms ± 24.5 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


In [11]:
%%timeit
np.sum(my_series)

444 µs ± 5.78 µs per loop (mean ± std. dev. of 7 runs, 1000 loops each)


In [12]:
%%timeit
mean = sum(my_dict.values()) / N
variance = sum([(x - mean)**2 for x in my_dict.values()])
std = variance ** 0.5

226 ms ± 4.6 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)


In [13]:
%%timeit
np.std(my_series)

2.1 ms ± 121 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)


## Case Study Nifty

In [245]:
nifty = pd.read_csv('nifty-200623-235920.csv',index_col=0).iloc[:,0]
print(nifty)
print(type(nifty))

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
                 ...   
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, Length: 245, dtype: float64
<class 'pandas.core.series.Series'>


In [103]:
nifty.head(5)

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
Name: Close, dtype: float64

In [104]:
nifty.tail(5)

Date
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, dtype: float64

In [105]:
print(np.mean(nifty))
print(np.median(nifty))
print(np.std(nifty))

11432.632244897959
11512.4
453.2866947459807


### Task 1 : What fraction of days did the market close higher than the previous day

In [140]:
new_nifty = nifty.shift()
print(nifty[nifty > new_nifty].size/nifty.size)

0.5265306122448979


In [153]:
nifty[1:] - nifty[:-1]

Date
01-Apr-2019    0.0
01-Aug-2019    0.0
01-Feb-2019    0.0
01-Jan-2019    NaN
01-Jul-2019    0.0
              ... 
31-Dec-2019    NaN
31-Jan-2019    0.0
31-Jul-2019    0.0
31-May-2019    0.0
31-Oct-2019    0.0
Name: Close, Length: 245, dtype: float64

### The above happend because it subtracted with the same index columns

In [162]:
np.sum((nifty.values[1:] > nifty.values[:-1]) > 0)/nifty.shape[0]

0.5265306122448979

### Task 2 : Compute moving average of the last five days

In [246]:
new_index = map(pd.Timestamp,nifty.index)
new_nifty = pd.Series(nifty,index=new_index)

In [261]:
# Create window with function rolling and do the computation
new_nifty.rolling('5d').mean()

2019-01-01    10910.100000
2019-01-02    10851.300000
2019-01-03    10791.616667
2019-01-04    10775.550000
2019-01-07    10723.800000
                  ...     
2019-12-24    12249.700000
2019-12-26    12201.283333
2019-12-27    12212.412500
2019-12-30    12209.400000
2019-12-31    12223.366667
Name: Close, Length: 245, dtype: float64

### Task 3 : Subset the data to include only the data for fridays

In [188]:
#nifty[nifty.index[3::5]]

In [263]:
d = pd.Timestamp(nifty.index[0])
d.dayofweek

1

In [265]:
#new_index = map(pd.Timestamp,nifty.index)
#new_nifty = pd.Series(nifty,index=new_index)
new_nifty[new_nifty.index.dayofweek == 4]

2019-01-04    10727.35
2019-01-11    10794.95
2019-01-18    10906.95
2019-01-25    10780.55
2019-02-01    10893.65
2019-02-08    10943.60
2019-02-15    10724.40
2019-02-22    10791.65
2019-03-01    10863.50
2019-03-08    11035.40
2019-03-15    11426.85
2019-03-22    11456.90
2019-03-29    11623.90
2019-04-05    11665.95
2019-04-12    11643.45
2019-04-26    11754.65
2019-05-03    11712.25
2019-05-10    11278.90
2019-05-17    11407.15
2019-05-24    11844.10
2019-05-31    11922.80
2019-06-07    11870.65
2019-06-14    11823.30
2019-06-21    11724.10
2019-06-28    11788.85
2019-07-05    11811.15
2019-07-12    11552.50
2019-07-19    11419.25
2019-07-26    11284.30
2019-08-02    10997.35
2019-08-09    11109.65
2019-08-16    11047.80
2019-08-23    10829.35
2019-08-30    11023.25
2019-09-06    10946.20
2019-09-13    11075.90
2019-09-20    11274.20
2019-09-27    11512.40
2019-10-04    11174.75
2019-10-11    11305.05
2019-10-18    11661.85
2019-10-25    11583.90
2019-11-01    11890.60
2019-11-08 