In [2]:
import pandas as pd
import numpy as np

### Classes and Instances ( object )

In [3]:
pd.Series()          # series is class  ;  dtype is object

Series([], dtype: object)

### Populating the Series with Values

In [4]:
ice_cream_flavors = [
    "Chocolate",
    "Vanilla",
    "Strawberry",
    "Rum Raisin",
]

pd.Series(ice_cream_flavors)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

### Customizing the Series Index

In [5]:
ice_cream_flavors = [
    "Chocolate",
    "Vanilla",
    "Strawberry",
    "Rum Raisin",
]

days_of_week = ("Monday", "Wednesday", "Friday", "Saturday")

# The two lines below are equivalent

pd.Series(ice_cream_flavors, days_of_week)
pd.Series(data = ice_cream_flavors, index = days_of_week)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Saturday     Rum Raisin
dtype: object

In [6]:
ice_cream_flavors = [
    "Chocolate",
    "Vanilla",
    "Strawberry",
    "Rum Raisin",
]

days_of_week = ("Monday", "Wednesday", "Friday", "Wednesday")

# The two lines below are equivalent
pd.Series(ice_cream_flavors, days_of_week)
pd.Series(data = ice_cream_flavors, index = days_of_week)

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Wednesday    Rum Raisin
dtype: object

In [7]:
bunch_of_bools = [True, False, False]
pd.Series(bunch_of_bools)

0     True
1    False
2    False
dtype: bool

In [8]:
stock_prices = [985.32, 950.44]
time_of_day = ["Open", "Close"]
pd.Series(data = stock_prices, index = time_of_day)

Open     985.32
Close    950.44
dtype: float64

In [9]:
lucky_numbers = [4, 8, 15, 16, 23, 42]
pd.Series(lucky_numbers)

0     4
1     8
2    15
3    16
4    23
5    42
dtype: int64

In [10]:
lucky_numbers = [4, 8, 15, 16, 23, 42]
pd.Series(lucky_numbers, dtype = "float")

0     4.0
1     8.0
2    15.0
3    16.0
4    23.0
5    42.0
dtype: float64

### Creating a Series with Missing Values

In [11]:
temperatures = [94, 88, np.nan, 91]
pd.Series(data = temperatures)

0    94.0
1    88.0
2     NaN
3    91.0
dtype: float64

### Create a Series from Python Objects

In [12]:
calorie_info = {
    "Cereal": 125,
    "Chocolate Bar": 406,
    "Ice Cream Sundae": 342,
}

diet = pd.Series(calorie_info)
diet

Cereal              125
Chocolate Bar       406
Ice Cream Sundae    342
dtype: int64

In [13]:
print(diet.values)
print(diet.index)
print(diet.size)
print(diet.shape)
print(diet.dtype)

[125 406 342]
Index(['Cereal', 'Chocolate Bar', 'Ice Cream Sundae'], dtype='object')
3
(3,)
int64


In [14]:
pd.Series(data = ("Red", "Green", "Blue"))

0      Red
1    Green
2     Blue
dtype: object

In [15]:
rgb_colors = [(120, 41, 26), (196, 165, 45)]
pd.Series(data = rgb_colors)

0     (120, 41, 26)
1    (196, 165, 45)
dtype: object

In [16]:
my_set = {"Ricky", "Bobby"}

# pd.Series(my_set)   CAN'T PRINT COZ SETs are Un-Ordered , cant be a series

In [17]:
pd.Series(list(my_set))

0    Ricky
1    Bobby
dtype: object

In [18]:
random_data = np.random.randint(1, 101, 10)
random_data

array([ 4, 14, 78, 92, 11,  8,  3, 70, 27, 40], dtype=int32)

In [19]:
pd.Series(random_data)

0     4
1    14
2    78
3    92
4    11
5     8
6     3
7    70
8    27
9    40
dtype: int32

 ### Retrieving the First and Last Rows

In [20]:
values = range(0, 500, 5)
nums = pd.Series(data = values)
nums

0       0
1       5
2      10
3      15
4      20
     ... 
95    475
96    480
97    485
98    490
99    495
Length: 100, dtype: int64

In [21]:
print(nums.head(3))
print(nums.head(n = 3))
print(nums.head())
print(nums.tail(6))
print(nums.tail())

0     0
1     5
2    10
dtype: int64
0     0
1     5
2    10
dtype: int64
0     0
1     5
2    10
3    15
4    20
dtype: int64
94    470
95    475
96    480
97    485
98    490
99    495
dtype: int64
95    475
96    480
97    485
98    490
99    495
dtype: int64


In [22]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

### Mathematical Operations

#### 1. Statistical Operations

In [23]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])
numbers

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [24]:
print(numbers.count())

5


In [25]:
print(numbers.sum())

15.0


In [26]:

print(numbers.sum(skipna = False))         # skipna stands for “skip NaN values”.

                                            # By default: skipna=True → ignores NaN values while summing.

                                            # If you set skipna=False:

                                            # If any NaN is present, the result will be NaN.



nan


In [27]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])

print(numbers.sum(min_count=4))               # enough non-NaN values: 4 so adds all

print(numbers.sum(min_count=6))          # only 3 valid numbers, need 6 ----> so NAN 

15.0
nan


In [28]:
print(numbers.product())
print(numbers.product(skipna=False))

print(numbers.product(min_count=3))
print(numbers.product(min_count=6))

120.0
nan
120.0
nan


In [29]:
# CUMMILATIVE SUM

print(numbers.cumsum())

print()

print(numbers.cumsum(skipna=False))

0     1.0
1     3.0
2     6.0
3     NaN
4    10.0
5    15.0
dtype: float64

0    1.0
1    3.0
2    6.0
3    NaN
4    NaN
5    NaN
dtype: float64


#### Percentage Change

In [30]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])

numbers.pct_change()

#  0         NaN
#  1    1.000000  (2-1)/1 = 1.0    
#  2    0.500000  (3-2)/2 = 0.5
#  3    0.000000  (3-3)/3 = 0.0
#  4    0.333333  (4-3)/3 = 0.333333
#  5    0.250000  (5-4)/4 = 0.25

  numbers.pct_change()


0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64

#### 'ffill' = forward fill NaNs before computing percentage change

In [31]:
import pandas as pd
import numpy as np

numbers = pd.Series([1, 2, 3, np.nan, 4, 5])  # F-FILL : 1 2 3 3  4 5 

print(numbers.pct_change(fill_method='ffill'))

0         NaN
1    1.000000
2    0.500000
3    0.000000
4    0.333333
5    0.250000
dtype: float64


  print(numbers.pct_change(fill_method='ffill'))


#### 'bfill' = backward fill NaNs before computing percentage change

In [32]:
import pandas as pd
import numpy as np

numbers = pd.Series([1, 2, 3, np.nan, 4, 5])         # B-FILL : 1 2 3 4 4 5
print(numbers.pct_change(fill_method='bfill'))

0         NaN
1    1.000000
2    0.500000
3    0.333333
4    0.000000
5    0.250000
dtype: float64


  print(numbers.pct_change(fill_method='bfill'))


In [33]:
# NOTE

[1, 2, 3, np.nan, 4, 5]
[1, 2, 3, 3, 4, 5]

[1, np.nan, np.nan, np.nan, 4, 4]
[1, 1, 1, 1, 4, 4] # ffill
[1, 4, 4, 4, 4, 4] # bfill


[1, 4, 4, 4, 4, 4]

In [35]:
numbers.describe()

count    5.000000
mean     3.000000
std      1.581139
min      1.000000
25%      2.000000
50%      3.000000
75%      4.000000
max      5.000000
dtype: float64

### Arithmetic Operations

In [36]:
s1 = pd.Series([5, np.nan, 10], index=["A", "B", "C"])

In [37]:
s1

A     5.0
B     NaN
C    10.0
dtype: float64

In [38]:
print(s1+3)
print(s1-3)
print(s1*4)
print(s1%3)
print(s1//7)

A     8.0
B     NaN
C    13.0
dtype: float64
A    2.0
B    NaN
C    7.0
dtype: float64
A    20.0
B     NaN
C    40.0
dtype: float64
A    2.0
B    NaN
C    1.0
dtype: float64
A    0.0
B    NaN
C    1.0
dtype: float64


In [41]:
s1.add(3)
s1.sub(5)
s1.mul(4)
s1.divide(7)
s1.mod(4)
s1.floordiv(4)


A    1.0
B    NaN
C    2.0
dtype: float64

### Broadcasting Series

In [46]:
s1 = pd.Series([1, 2, 5], ['A', 'B', 'C'])
s2 = pd.Series([4, 5, 5], ['A', 'B', 'C'])

In [47]:
print(s1 + s2)

print(s1 == s2)
print(s1.eq(s2))

s1 != s2
s1.ne(s2)

A     5
B     7
C    10
dtype: int64
A    False
B    False
C     True
dtype: bool
A    False
B    False
C     True
dtype: bool


A     True
B     True
C    False
dtype: bool

In [48]:
# CREATINF A SERIES

s1 = pd.Series(
    data = [5, 10, 15], index = ["A", "B", "C"]
)

s2 = pd.Series(
    data = [4, 8, 12, 14], index = ["B", "C", "D", "E"]
)

In [49]:
s1 + s2

A     NaN
B    14.0
C    23.0
D     NaN
E     NaN
dtype: float64

###  Passing the Series to Python's Built-In Functions

In [65]:
cities = pd.Series(
    data = ["San Francisco", "Los Angeles", "Las Vegas", np.nan]
)

In [66]:
cities

0    San Francisco
1      Los Angeles
2        Las Vegas
3              NaN
dtype: object

In [62]:
print('len \n' , len(cities))
print( '\n type \n' , type(cities))
print('\ndir \n ' , dir(cities))
print('\nlist \n ' , list(cities))
print('\n dictionary \n' , dict(cities))


len 
 4

 type 
 <class 'pandas.core.series.Series'>

dir 
  ['T', '_AXIS_LEN', '_AXIS_ORDERS', '_AXIS_TO_AXIS_NUMBER', '_HANDLED_TYPES', '__abs__', '__add__', '__and__', '__annotations__', '__array__', '__array_priority__', '__array_ufunc__', '__bool__', '__class__', '__column_consortium_standard__', '__contains__', '__copy__', '__deepcopy__', '__delattr__', '__delitem__', '__dict__', '__dir__', '__divmod__', '__doc__', '__eq__', '__finalize__', '__float__', '__floordiv__', '__format__', '__ge__', '__getattr__', '__getattribute__', '__getitem__', '__getstate__', '__gt__', '__hash__', '__iadd__', '__iand__', '__ifloordiv__', '__imod__', '__imul__', '__init__', '__init_subclass__', '__int__', '__invert__', '__ior__', '__ipow__', '__isub__', '__iter__', '__itruediv__', '__ixor__', '__le__', '__len__', '__lt__', '__matmul__', '__mod__', '__module__', '__mul__', '__ne__', '__neg__', '__new__', '__nonzero__', '__or__', '__pandas_priority__', '__pos__', '__pow__', '__radd__', '__rand__', '__

In [63]:
cities

0    San Francisco
1      Los Angeles
2        Las Vegas
3              NaN
dtype: object

In [None]:
print("Las Vegas" in cities)  # False

print("Las Vegas" in cities.values) # True

False
True


In [80]:
print(2 in cities)
print(100 not in cities)

print("Paris" not in cities.values)


True
True
True


In [83]:
superheroes = [
    "Batman",
    "Superman",
    "Spider-Man",
    "Iron Man",
    "Captain America",
    "Wonder Woman"
]

strength_levels = (100, 120, 90, 95, 110, 120)

heroes = pd.Series( data = strength_levels, index = superheroes )

heroes

Batman             100
Superman           120
Spider-Man          90
Iron Man            95
Captain America    110
Wonder Woman       120
dtype: int64