In [1]:
import numpy as np
import pandas as pd

# Creating Series Objects

In [2]:
s = pd.Series([0, 1, 1, 2, 3, 5, 8])

In [3]:
print(s)

0    0
1    1
2    1
3    2
4    3
5    5
6    8
dtype: int64


In [4]:
s = pd.Series([0.0, 1, 1, 2, 3, 5, 8])

In [5]:
print(s)

0    0.0
1    1.0
2    1.0
3    2.0
4    3.0
5    5.0
6    8.0
dtype: float64


In [6]:
s.values

array([0., 1., 1., 2., 3., 5., 8.])

In [7]:
s.index

RangeIndex(start=0, stop=7, step=1)

In [8]:
for v in s.values:
    print(v)

0.0
1.0
1.0
2.0
3.0
5.0
8.0


In [9]:
for i in s.index:
    print(i)

0
1
2
3
4
5
6


In [10]:
for item in zip(s.index, s.values):
    print(item)

(0, 0.0)
(1, 1.0)
(2, 1.0)
(3, 2.0)
(4, 3.0)
(5, 5.0)
(6, 8.0)


In [11]:
s[0]

0.0

In [12]:
s[1]

1.0

In [13]:
s[5]

5.0

In [14]:
mercury = pd.Series([0.33, 57.9, 4222.6], index=['mass', 'diameter', 'dayLength'])

In [15]:
print(mercury)

mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64


In [16]:
mercury['mass']

0.33

In [17]:
mercury['dayLength']

4222.6

In [18]:
mercury.mass

0.33

In [19]:
arr = np.random.randint(0, 10, 10)

In [20]:
arr

array([8, 3, 6, 9, 1, 2, 7, 7, 4, 5])

In [21]:
ind = np.arange(10, 20)

In [22]:
rand_series = pd.Series(arr, index=ind)

In [23]:
print(rand_series)

10    8
11    3
12    6
13    9
14    1
15    2
16    7
17    7
18    4
19    5
dtype: int64


In [24]:
# mercury = pd.Series([0.33, 57.9, 4222.6], index=['mass', 'diameter', 'dayLength'])

d = {}
d['mass'] = 0.33
d['diameter'] = 57.9
d['dayLength'] = 4222.6


In [25]:
print(d)

{'mass': 0.33, 'diameter': 57.9, 'dayLength': 4222.6}


In [26]:
mercury = pd.Series(d)

In [27]:
print(mercury)

mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64


In [28]:
mercury = pd.Series(d, index=['mass', 'diameter', 'dayLength'])

In [29]:
print(mercury)

mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64


In [30]:
mercury = pd.Series(d, index=['mass', 'diameter'])

In [31]:
print(mercury)

mass         0.33
diameter    57.90
dtype: float64


# iloc and loc

In [32]:
s = pd.Series([0.0, 1, 1, 2, 3, 5, 8], index=[1, 2, 3, 4, 5, 6, 7])

In [33]:
print(s)

1    0.0
2    1.0
3    1.0
4    2.0
5    3.0
6    5.0
7    8.0
dtype: float64


In [34]:
s.loc[4]

2.0

In [35]:
s.iloc[4]

3.0

In [36]:
s.iloc[0]

0.0

In [None]:
s.loc[0]

In [38]:
mercury = pd.Series(d, index=['mass', 'diameter', 'dayLength'])

In [39]:
mercury.loc['mass']

0.33

In [40]:
mercury.iloc[0]

0.33

In [41]:
mercury.iloc[-1]

4222.6

In [45]:
mercury.iloc[0:1]

mass    0.33
dtype: float64

In [48]:
mercury.loc[:'dayLength']

mass            0.33
diameter       57.90
dayLength    4222.60
dtype: float64

# Simple operations

In [49]:
mass = pd.Series([0.33, 4.87, 5.97, 0.642, 1898, 568, 86.8, 102, 0.0146], 
                 index=['Mercury', 'Venus', 'Earth', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])

In [50]:
print(mass)

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64


In [51]:
mass[1]

4.87

In [52]:
mass.iloc[1]

4.87

In [53]:
mass.loc['Earth']

5.97

In [54]:
mass['Earth']

5.97

In [55]:
mass['Earth': 'Jupiter']

Earth         5.970
Mars          0.642
Jupiter    1898.000
dtype: float64

In [56]:
mass[2:5]

Earth         5.970
Mars          0.642
Jupiter    1898.000
dtype: float64

In [57]:
mass.iloc[2:5]

Earth         5.970
Mars          0.642
Jupiter    1898.000
dtype: float64

In [58]:
mass > 100

Mercury    False
Venus      False
Earth      False
Mars       False
Jupiter     True
Saturn      True
Uranus     False
Neptune     True
Pluto      False
dtype: bool

In [59]:
mass[mass > 100]

Jupiter    1898.0
Saturn      568.0
Neptune     102.0
dtype: float64

In [61]:
mass[(mass > 100) & (mass < 600)]

Saturn     568.0
Neptune    102.0
dtype: float64

In [62]:
mass

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64

In [63]:
mass * 2

Mercury       0.6600
Venus         9.7400
Earth        11.9400
Mars          1.2840
Jupiter    3796.0000
Saturn     1136.0000
Uranus      173.6000
Neptune     204.0000
Pluto         0.0292
dtype: float64

In [64]:
mass / 10

Mercury      0.03300
Venus        0.48700
Earth        0.59700
Mars         0.06420
Jupiter    189.80000
Saturn      56.80000
Uranus       8.68000
Neptune     10.20000
Pluto        0.00146
dtype: float64

In [65]:
np.mean(mass)

296.29184444444445

In [66]:
np.amin(mass)

0.0146

In [67]:
np.amax(mass)

1898.0

In [68]:
np.median(mass)

5.97

In [69]:
mass + mass

Mercury       0.6600
Venus         9.7400
Earth        11.9400
Mars          1.2840
Jupiter    3796.0000
Saturn     1136.0000
Uranus      173.6000
Neptune     204.0000
Pluto         0.0292
dtype: float64

In [70]:
mass - mass

Mercury    0.0
Venus      0.0
Earth      0.0
Mars       0.0
Jupiter    0.0
Saturn     0.0
Uranus     0.0
Neptune    0.0
Pluto      0.0
dtype: float64

In [71]:
big_mass = mass[mass > 100]

In [72]:
big_mass

Jupiter    1898.0
Saturn      568.0
Neptune     102.0
dtype: float64

In [73]:
mass

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64

In [75]:
new_mass = mass + big_mass

In [76]:
print(new_mass)

Earth         NaN
Jupiter    3796.0
Mars          NaN
Mercury       NaN
Neptune     204.0
Pluto         NaN
Saturn     1136.0
Uranus        NaN
Venus         NaN
dtype: float64


In [77]:
pd.isnull(new_mass)

Earth       True
Jupiter    False
Mars        True
Mercury     True
Neptune    False
Pluto       True
Saturn     False
Uranus      True
Venus       True
dtype: bool

In [79]:
new_mass[~pd.isnull(new_mass)]

Jupiter    3796.0
Neptune     204.0
Saturn     1136.0
dtype: float64

In [80]:
mass

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
dtype: float64

In [82]:
mass['Moon'] = 0.7346

In [83]:
mass

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
Moon          0.7346
dtype: float64

In [84]:
mass.drop(['Pluto'])

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Moon          0.7346
dtype: float64

**Task 1**

Collect numbers for the diameters of these planets (heavenly bodies) and store it as a Series object. Then given these two Series objects mass and diameter, compute the density of each planet.

In [85]:
diameter = pd.Series([4879, 12104, 12756, 3475, 6792, 142984, 120536, 51118, 49528, 2370], 
                     index=['Mercury', 'Venus', 'Earth', 'Moon', 'Mars', 'Jupiter', 'Saturn', 'Uranus', 'Neptune', 'Pluto'])

In [None]:
density = pd.Series([])

In [87]:
print(density)

Series([], dtype: float64)


In [88]:
mass

Mercury       0.3300
Venus         4.8700
Earth         5.9700
Mars          0.6420
Jupiter    1898.0000
Saturn      568.0000
Uranus       86.8000
Neptune     102.0000
Pluto         0.0146
Moon          0.7346
dtype: float64

In [89]:
diameter

Mercury      4879
Venus       12104
Earth       12756
Moon         3475
Mars         6792
Jupiter    142984
Saturn     120536
Uranus      51118
Neptune     49528
Pluto        2370
dtype: int64

In [91]:
for planet in mass.index:
    density[planet] = mass[planet] / (np.pi * diameter[planet] * diameter[planet] * diameter[planet] / 6)

In [92]:
print(density)

Mercury    5.426538e-12
Venus      5.244977e-12
Earth      5.493286e-12
Mars       3.913302e-12
Jupiter    1.240039e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Neptune    1.603427e-12
Pluto      2.094639e-12
Moon       3.343396e-11
dtype: float64


In [93]:
density = mass / (np.pi * np.power(diameter, 3) / 6)

In [94]:
density

Earth      5.493286e-12
Jupiter    1.240039e-12
Mars       3.913302e-12
Mercury    5.426538e-12
Moon       3.343396e-11
Neptune    1.603427e-12
Pluto      2.094639e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Venus      5.244977e-12
dtype: float64

In [101]:
mass['PlanetX'] = 6

In [102]:
density = mass / (np.pi * np.power(diameter, 3) / 6)

In [103]:
density

Earth      5.493286e-12
Jupiter    1.240039e-12
Mars       3.913302e-12
Mercury    5.426538e-12
Moon       3.343396e-11
Neptune    1.603427e-12
PlanetX             NaN
Pluto      2.094639e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Venus      5.244977e-12
dtype: float64

**Task 2**

Given this density Series, replace all values which NaNs with the mean density of all planets

In [98]:
density_mean = np.mean(density)

for key in density.index:
    if pd.isnull(density[key]):
        density[key] = density_mean

In [99]:
print(density)

Earth      5.493286e-12
Jupiter    1.240039e-12
Mars       3.913302e-12
Mercury    5.426538e-12
Moon       3.343396e-11
Neptune    1.603427e-12
PlanetX    6.031069e-12
Pluto      2.094639e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Venus      5.244977e-12
dtype: float64


In [105]:
density[pd.isnull(density)] = np.mean(density)

In [106]:
print(density)

Earth      5.493286e-12
Jupiter    1.240039e-12
Mars       3.913302e-12
Mercury    5.426538e-12
Moon       3.343396e-11
Neptune    1.603427e-12
PlanetX    6.031069e-12
Pluto      2.094639e-12
Saturn     6.194402e-13
Uranus     1.241079e-12
Venus      5.244977e-12
dtype: float64


**Task 3**

Compare Dictionary with Series: 
- checking if some key is present
- summing values
- computing std

In [115]:
my_dict = {}
N = 1000000
for i in range(N):
    my_dict[i] = i%10

In [116]:
my_series = pd.Series(my_dict)

In [117]:
M = 10000

In [118]:
arr = np.random.randint(0, N, M)

In [119]:
%%timeit
for i in arr:
    i in my_dict

100 loops, best of 3: 6.73 ms per loop


In [120]:
%%timeit
for i in arr:
    i in my_series

The slowest run took 5.43 times longer than the fastest. This could mean that an intermediate result is being cached.
100 loops, best of 3: 11.2 ms per loop


In [121]:
%%timeit
sum(my_dict.values())

100 loops, best of 3: 7.52 ms per loop


In [123]:
%%timeit
np.sum(my_series)

1000 loops, best of 3: 821 µs per loop


In [124]:
%%timeit
mean = sum(my_dict.values()) / N
variance = sum([(x - mean)**2 for x in my_dict.values()])
std = variance ** 0.5

10 loops, best of 3: 157 ms per loop


In [125]:
%%timeit
np.std(my_series)

100 loops, best of 3: 3.41 ms per loop


# NIFTY case-study

In [130]:
nifty = pd.read_csv('nifty.csv', index_col=0).iloc[:, 0]

In [131]:
nifty

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
                 ...   
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, Length: 245, dtype: float64

In [132]:
nifty.head(25)

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
08-Jan-2019    10802.15
09-Jan-2019    10855.15
10-Jan-2019    10821.60
11-Jan-2019    10794.95
14-Jan-2019    10737.60
15-Jan-2019    10886.80
16-Jan-2019    10890.30
17-Jan-2019    10905.20
18-Jan-2019    10906.95
21-Jan-2019    10961.85
22-Jan-2019    10922.75
23-Jan-2019    10831.50
24-Jan-2019    10849.80
25-Jan-2019    10780.55
28-Jan-2019    10661.55
29-Jan-2019    10652.20
30-Jan-2019    10651.80
31-Jan-2019    10830.95
01-Feb-2019    10893.65
04-Feb-2019    10912.25
Name: Close, dtype: float64

In [133]:
nifty.tail(25)

Date
26-Nov-2019    12037.70
27-Nov-2019    12100.70
28-Nov-2019    12151.15
29-Nov-2019    12056.05
02-Dec-2019    12048.20
03-Dec-2019    11994.20
04-Dec-2019    12043.20
05-Dec-2019    12018.40
06-Dec-2019    11921.50
09-Dec-2019    11937.50
10-Dec-2019    11856.80
11-Dec-2019    11910.15
12-Dec-2019    11971.80
13-Dec-2019    12086.70
16-Dec-2019    12053.95
17-Dec-2019    12165.00
18-Dec-2019    12221.65
19-Dec-2019    12259.70
20-Dec-2019    12271.80
23-Dec-2019    12262.75
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, dtype: float64

In [134]:
np.mean(nifty)

11432.632244897959

In [135]:
np.median(nifty)

11512.4

In [136]:
np.std(nifty)

453.2866947459807

What fraction of days did the markets close higher than the previous day's close

In [137]:
nifty[0]

10910.1

In [138]:
nifty[1]

10792.5

In [139]:
nifty[1] - nifty[0]

-117.60000000000036

In [140]:
nifty[1:]

Date
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
08-Jan-2019    10802.15
                 ...   
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, Length: 244, dtype: float64

In [141]:
nifty[:-1]

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
                 ...   
23-Dec-2019    12262.75
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
Name: Close, Length: 244, dtype: float64

In [142]:
nifty[1:] - nifty[:-1]

Date
01-Apr-2019    0.0
01-Aug-2019    0.0
01-Feb-2019    0.0
01-Jan-2019    NaN
01-Jul-2019    0.0
              ... 
31-Dec-2019    NaN
31-Jan-2019    0.0
31-Jul-2019    0.0
31-May-2019    0.0
31-Oct-2019    0.0
Name: Close, Length: 245, dtype: float64

In [147]:
np.sum((nifty.values[1:] - nifty.values[:-1]) > 0) / len(nifty)

0.5265306122448979

**Tasks**

1. Compute moving average of the last 5 days

2. Subset the data to include only data for Fridays

In [155]:
nifty

Date
01-Jan-2019    10910.10
02-Jan-2019    10792.50
03-Jan-2019    10672.25
04-Jan-2019    10727.35
07-Jan-2019    10771.80
                 ...   
24-Dec-2019    12214.55
26-Dec-2019    12126.55
27-Dec-2019    12245.80
30-Dec-2019    12255.85
31-Dec-2019    12168.45
Name: Close, Length: 245, dtype: float64

In [156]:
nifty.index[0]

'01-Jan-2019'

In [158]:
d = pd.Timestamp(nifty.index[0])

In [159]:
d.dayofweek

1

In [160]:
new_index = map(pd.Timestamp, nifty.index)

In [161]:
new_nifty = pd.Series(nifty, index = new_index)

In [162]:
new_nifty

2019-01-01    10910.10
2019-01-02    10792.50
2019-01-03    10672.25
2019-01-04    10727.35
2019-01-07    10771.80
                ...   
2019-12-24    12214.55
2019-12-26    12126.55
2019-12-27    12245.80
2019-12-30    12255.85
2019-12-31    12168.45
Name: Close, Length: 245, dtype: float64

In [163]:
new_nifty.index[0]

Timestamp('2019-01-01 00:00:00')

In [164]:
new_nifty.rolling('5d').mean()

2019-01-01    10910.100000
2019-01-02    10851.300000
2019-01-03    10791.616667
2019-01-04    10775.550000
2019-01-07    10723.800000
                  ...     
2019-12-24    12249.700000
2019-12-26    12201.283333
2019-12-27    12212.412500
2019-12-30    12209.400000
2019-12-31    12223.366667
Name: Close, Length: 245, dtype: float64

In [167]:
dow = new_nifty.copy()
for i in dow.index:
    dow[i] = i.dayofweek

In [168]:
dow

2019-01-01    1.0
2019-01-02    2.0
2019-01-03    3.0
2019-01-04    4.0
2019-01-07    0.0
             ... 
2019-12-24    1.0
2019-12-26    3.0
2019-12-27    4.0
2019-12-30    0.0
2019-12-31    1.0
Name: Close, Length: 245, dtype: float64

In [169]:
new_nifty[dow == 4]

2019-01-04    10727.35
2019-01-11    10794.95
2019-01-18    10906.95
2019-01-25    10780.55
2019-02-01    10893.65
2019-02-08    10943.60
2019-02-15    10724.40
2019-02-22    10791.65
2019-03-01    10863.50
2019-03-08    11035.40
2019-03-15    11426.85
2019-03-22    11456.90
2019-03-29    11623.90
2019-04-05    11665.95
2019-04-12    11643.45
2019-04-26    11754.65
2019-05-03    11712.25
2019-05-10    11278.90
2019-05-17    11407.15
2019-05-24    11844.10
2019-05-31    11922.80
2019-06-07    11870.65
2019-06-14    11823.30
2019-06-21    11724.10
2019-06-28    11788.85
2019-07-05    11811.15
2019-07-12    11552.50
2019-07-19    11419.25
2019-07-26    11284.30
2019-08-02    10997.35
2019-08-09    11109.65
2019-08-16    11047.80
2019-08-23    10829.35
2019-08-30    11023.25
2019-09-06    10946.20
2019-09-13    11075.90
2019-09-20    11274.20
2019-09-27    11512.40
2019-10-04    11174.75
2019-10-11    11305.05
2019-10-18    11661.85
2019-10-25    11583.90
2019-11-01    11890.60
2019-11-08 