In [2]:
import numpy as np 
import pandas as pd 

In [3]:
pd.Series()

Series([], dtype: object)

### Creating A Series Object

In [3]:
pd.Series([1,2,3,4,5,6])

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

In [5]:
pd.Series(data = [1,2,3,4,5,6])  # Both are same

0    1
1    2
2    3
3    4
4    5
5    6
dtype: int64

### Creating a Series

In [19]:
ice_cream = [

    'chocolate' ,
    'vanilla',
    'strawberry' , 
    'rum raisin'
]

days = ['monday' , 'wednesday' , 'friday' , 'saturday']

pd.Series(data = ice_cream , index = days)

monday        chocolate
wednesday       vanilla
friday       strawberry
saturday     rum raisin
dtype: object

In [16]:
bunch_of_bools = ['True' , 'False ', 'True']
pd.Series(bunch_of_bools)

0      True
1    False 
2      True
dtype: object

In [20]:
stock_price = [123.45 , 140.20]
time_of_day = ['open' , 'close']
pd.Series(data = stock_price , index = time_of_day)

open     123.45
close    140.20
dtype: float64

In [21]:
random_num = [10,20,40,56,66,78,1,22,5,33,2,4,6]
pd.Series(random_num)

0     10
1     20
2     40
3     56
4     66
5     78
6      1
7     22
8      5
9     33
10     2
11     4
12     6
dtype: int64

In [22]:
random_num = [10,20,40,56,66,78,1,22,5,33,2,4,6]
pd.Series(random_num , dtype = np.float64)

0     10.0
1     20.0
2     40.0
3     56.0
4     66.0
5     78.0
6      1.0
7     22.0
8      5.0
9     33.0
10     2.0
11     4.0
12     6.0
dtype: float64

### Missing Values

In [None]:
temperatures = [ 30.5 , 32.0 , np.nan , 31.2]    # np.nan = not a num ( missing value )
pd.Series(temperatures)

0    30.5
1    32.0
2     NaN
3    31.2
dtype: float64

#### Create a Series from Python Objects

In [29]:
calories = { 
              'biriyani' : 1000 , 
              'pizza' : 800, 
              'burger' : 500, 
              'pasta' : 600,
            }
diet = pd.Series(calories)
diet

biriyani    1000
pizza        800
burger       500
pasta        600
dtype: int64

In [30]:
diet.values

array([1000,  800,  500,  600])

In [34]:
diet.index

Index(['biriyani', 'pizza', 'burger', 'pasta'], dtype='object')

In [35]:
diet.size

4

In [36]:
diet.shape

(4,)

In [37]:
diet.dtype

dtype('int64')

In [None]:
diet.is_unique   # NO Duplicate Values

True

In [4]:
s1 = pd.Series([1, 2, 3, 3, 4])
s1.is_unique

False

In [None]:
s2 = pd.Series([1, 2, 3, 4])
s2.is_monotonic_increasing


True

In [6]:
s2 = pd.Series([1, 2, 3, 4])
s2.is_monotonic_decreasing

False

In [12]:
s3 = pd.Series([4, 3, 2, 1, 10, 25])
s3.head()

0     4
1     3
2     2
3     1
4    10
dtype: int64

In [None]:
s3.tail(n=3) # starts from 3

3     1
4    10
5    25
dtype: int64

### Statistical operations

In [13]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])

In [15]:
print(numbers.count())

5


In [17]:
print(numbers.sum())

15.0


In [20]:
print(numbers.sum(skipna=False))

                                # skipna stands for “skip NaN values”.

                                # By default: skipna=True → ignores NaN values while summing.

                                # If you set skipna=False:

                                # If any NaN is present, the result will be NaN.

nan


In [25]:
print(numbers)

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64


In [None]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])

print(numbers.sum(min_count=4))               # enough non-NaN values: 4 so adds all

print(numbers.sum(min_count=6))          # only 3 valid numbers, need 6 ----> so NAN         

<built-in function sum>
15.0
nan


In [31]:
print(numbers.product())
print(numbers.product(skipna=False))

print(numbers.product(min_count=3))
print(numbers.product(min_count=6))

120.0
nan
120.0
nan


In [33]:
# CUMMILATIVE SUM

print(numbers.cumsum())

print()

print(numbers.cumsum(skipna=False))

0     1.0
1     3.0
2     6.0
3     NaN
4    10.0
5    15.0
dtype: float64

0    1.0
1    3.0
2    6.0
3    NaN
4    NaN
5    NaN
dtype: float64


## Percentage Change

In [None]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])

numbers.pct_change()

#  0         NaN
#  1    1.000000  (2-1)/1 = 1.0    
#  2    0.500000  (3-2)/2 = 0.5
#  3    0.000000  (3-3)/3 = 0.0
#  4    0.333333  (4-3)/3 = 0.333333
#  5    0.250000  (5-4)/4 = 0.25

  numbers.pct_change()


0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

#### 'ffill' = forward fill NaNs before computing percentage change

In [37]:
import pandas as pd
import numpy as np

numbers = pd.Series([1, 2, 3, np.nan, 4, 5])
print(numbers.pct_change(fill_method='ffill'))

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64


  print(numbers.pct_change(fill_method='ffill'))


#### 'bfill' = backward fill NaNs before computing percentage change

In [38]:
import pandas as pd
import numpy as np

numbers = pd.Series([1, 2, 3, np.nan, 4, 5])
print(numbers.pct_change(fill_method='bfill'))

0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.000000
5    0.250000
dtype: float64


  print(numbers.pct_change(fill_method='bfill'))


In [39]:
[1, 2, 3, np.nan, 4, 5]
[1, 2, 3, 3, 4, 5]
[1, np.nan, np.nan, np.nan, 4, 4]
[1, 1, 1, 1, 4, 4] # ffill
[1, 4, 4, 4, 4, 4] # bfill

[1, 4, 4, 4, 4, 4]

In [40]:
numbers.describe()

count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64

In [None]:
fruits = pd.Series(['apple', 'orange', 'kiwi'])       # ALPHABETICAL ORDER
print('MIN : ' , fruits.min())
print('MAX : ' , fruits.max())

MIN :  apple
MAX :  orange


In [45]:
cities = pd.Series(['hyd', 'amr', 'del', 'mum', 'che', 'amr', 'hyd'])
cities


0    hyd
1    amr
2    del
3    mum
4    che
5    amr
6    hyd
dtype: object

In [46]:
print('unique : ' , cities.unique())
print('Nunique : ' , cities.nunique())

unique :  ['hyd' 'amr' 'del' 'mum' 'che']
Nunique :  5


In [None]:
'hyd' in cities  # Here, Pandas checks labels of the index, not the values. Index = 1 2 3 ....

False

In [57]:
'hyd' in list(cities)

True

### Arithmetic Operations

In [47]:
s1 = pd.Series([5, np.nan, 10], index=["A", "B", "C"])


In [48]:
s1


A     5.0
B     NaN
C    10.0
dtype: float64

In [49]:
print(s1+3)
print(s1-3)
print(s1*4)
print(s1%3)
print(s1//7)

A     8.0
B     NaN
C    13.0
dtype: float64
A    2.0
B    NaN
C    7.0
dtype: float64
A    20.0
B     NaN
C    40.0
dtype: float64
A    2.0
B    NaN
C    1.0
dtype: float64
A    0.0
B    NaN
C    1.0
dtype: float64


In [52]:
s1.add(3)
s1.sub(5)
s1.mul(4)
s1.divide(7)
s1.mod(4)

A    1.0
B    NaN
C    2.0
dtype: float64

### Broadcasting Series

In [53]:
s1 = pd.Series([1, 2, 3], ['A', 'B', 'C'])
s2 = pd.Series([4, 5, 5], ['A', 'B', 'C'])

In [None]:
print(s1 + s2)
print(s1 == s2)
print(s1.eq(s2))

A    5
B    7
C    8
dtype: int64
A    False
B    False
C    False
dtype: bool
A    False
B    False
C    False
dtype: bool
