In [1]:
import pandas as pd
import numpy as np
%config completer.use_jedi = False #for auto complete

In [3]:
pd.Series

pandas.core.series.Series

In [4]:
pd.Series()

  pd.Series()


Series([], dtype: float64)

In [5]:
# a new series
ice_cream_flavors = [
            "Chocolate",
            "Vanilla",
            "Strawberry",
            "Rum Raisin",
        ]

In [6]:
pd.Series(ice_cream_flavors)

0     Chocolate
1       Vanilla
2    Strawberry
3    Rum Raisin
dtype: object

In [9]:
pd.Series(ice_cream_flavors) == pd.Series(data=ice_cream_flavors)

0    True
1    True
2    True
3    True
dtype: bool

In [10]:
# pass objects of different data types to the data and index parameters, but they must have the same length so that pandas can associate their values. 

ice_cream_flavors = [
            "Chocolate",
            "Vanilla",
            "Strawberry",
            "Rum Raisin",
        ]

days_of_week = ("Monday", "Wednesday", "Friday", "Saturday")

In [11]:
pd.Series(ice_cream_flavors,days_of_week)
pd.Series(data=ice_cream_flavors, index=days_of_week)

# ice_cream_flavors is a list and days_of_week is tuple but still pandas was able to match 

Monday        Chocolate
Wednesday       Vanilla
Friday       Strawberry
Saturday     Rum Raisin
dtype: object

In [12]:
# forcing a data type
lucky_numbers = [4, 8, 15, 16, 23, 42]
pd.Series(lucky_numbers, dtype = "float")

# pd.Series(dtype = "float" , data=lucky_numbers) is wrong as data comes first than index as per pd constructor arguments
# pd.Series(press shift + tab to get constructor parameters)

0     4.0
1     8.0
2    15.0
3    16.0
4    23.0
5    42.0
dtype: float64

In [13]:
# To create a Series that stores tuples, wrap the tuples in a list. Tuples work well for row values that consist of multiple parts or components, such as an address:

rgb_colors = [(120, 41, 26), (196, 165, 45)]
pd.Series(data = rgb_colors)


0     (120, 41, 26)
1    (196, 165, 45)
dtype: object

In [14]:
# random integer randint data

random_data = np.random.randint(1, 101, 10) # 10 numbers starting from 1 to 100
random_data

array([ 8,  6, 42, 97, 47,  4, 76, 11,  4, 17])

In [15]:
random_data

array([ 8,  6, 42, 97, 47,  4, 76, 11,  4, 17])

In [16]:
random_data = np.random.randint(1, 101, 10) # 10 numbers starting from 1 to 100
random_data

array([50, 34, 73, 12, 96, 16,  9, 81, 48, 67])

In [17]:
random_data = np.random.randint(1, 101, 10) # 10 numbers starting from 1 to 100
random_data

array([94, 40, 74, 10, 22, 96, 39, 42, 10, 63])

In [18]:
pd.Series(random_data)

0    94
1    40
2    74
3    10
4    22
5    96
6    39
7    42
8    10
9    63
dtype: int32

### Retrieving the first and last rows

In [19]:
#use Python’s built-in range function to generate a sequence of all numbers between a starting point and an endpoint. The range function’s three arguments are a lower bound, the upper bound, and a step sequence (the interval between every two numbers).

values = range(0,500,5)
nums = pd.Series(data=values)
nums

0       0
1       5
2      10
3      15
4      20
     ... 
95    475
96    480
97    485
98    490
99    495
Length: 100, dtype: int64

In [20]:
nums.head(4)

0     0
1     5
2    10
3    15
dtype: int64

In [21]:
nums.tail()

95    475
96    480
97    485
98    490
99    495
dtype: int64

## Mathematical operations

### Statistical operations

In [22]:
numbers = pd.Series([1, 2, 3, np.nan, 4, 5])
numbers

# if a data source has even a single missing value, pandas will coerce the integers to floating-point values:

0    1.0
1    2.0
2    3.0
3    NaN
4    4.0
5    5.0
dtype: float64

In [23]:
numbers.count()
# count only counts non-null values

5

In [26]:
numbers.sum()

# but if we were to inlcude null value also

numbers.sum(skipna=False)
# then op will be nan as pandas does not know value of nan

nan

In [27]:
numbers.mean()

3.0

In [29]:
numbers.product()

120.0

In [30]:
numbers.std() # standard deviation

1.5811388300841898

In [31]:
# The unique method returns a NumPy ndarray of unique values from the Series

authors = pd.Series(
             ["Hemingway", "Orwell", "Dostoevsky", "Fitzgerald", "Orwell"])
 
authors.unique() #Orwell is only 1 in op

array(['Hemingway', 'Orwell', 'Dostoevsky', 'Fitzgerald'], dtype=object)

In [32]:
# Comparison operations between Series become trickier when the indices differ. One index may have a greater or smaller number of labels, or there may be a mismatch between the labels themselves.

s1 = pd.Series(data = [5, 10, 15], index = ["A", "B", "C"])
 
s2 = pd.Series(data = [4, 8, 12, 14], index = ["B", "C", "D", "E"])

In [33]:
s1 + s2
# pandas will append NaN since the index are not matching i.e there is no matching for A , D , E

A     NaN
B    14.0
C    23.0
D     NaN
E     NaN
dtype: float64

In [34]:
cities = pd.Series(data = ["San Francisco", "Los Angeles", "Las  Vegas", np.nan])

In [35]:
len(cities)

4

In [36]:
type(cities)

pandas.core.series.Series

In [37]:
 # series can be converted to dictionary or list
    
list(cities)

['San Francisco', 'Los Angeles', 'Las  Vegas', nan]

In [38]:
type(cities)

pandas.core.series.Series

In [39]:
type(list(cities))

list

In [40]:
dict(cities)

{0: 'San Francisco', 1: 'Los Angeles', 2: 'Las  Vegas', 3: nan}

In [41]:
type(dict(cities))

dict

In [42]:
superheroes = [
             "Batman",
             "Superman",
             "Spider-Man",
             "Iron Man",
             "Captain America",
             "Wonder Woman"
         ]

strength_levels = (100, 120, 90, 95, 110, 120)

In [43]:
# convert them to series

pd.Series(superheroes)

0             Batman
1           Superman
2         Spider-Man
3           Iron Man
4    Captain America
5       Wonder Woman
dtype: object

In [44]:
pd.Series(strength_levels)

0    100
1    120
2     90
3     95
4    110
5    120
dtype: int64

In [46]:
heroes = pd.Series(data=strength_levels , index=superheroes)

In [47]:
heroes

Batman             100
Superman           120
Spider-Man          90
Iron Man            95
Captain America    110
Wonder Woman       120
dtype: int64

In [48]:
# first 2 rows
heroes.head(2)

Batman      100
Superman    120
dtype: int64

In [49]:
# last 4 rows
heroes.tail(4)

Spider-Man          90
Iron Man            95
Captain America    110
Wonder Woman       120
dtype: int64

In [50]:
# nr of unqiue values
heroes.nunique()

5

In [51]:
heroes.min()

90

In [53]:
min_s = heroes.min()
max_s = heroes.max()
min_s , max_s

(90, 120)

In [54]:
# series to dict
dict(heroes)

{'Batman': 100,
 'Superman': 120,
 'Spider-Man': 90,
 'Iron Man': 95,
 'Captain America': 110,
 'Wonder Woman': 120}