In [1]:
!#/usr/bin/python3

In [2]:
from IPython.core.interactiveshell import InteractiveShell
InteractiveShell.ast_node_interactivity = "all"

In [3]:
import pandas as pd
import numpy as np 

pd.__version__

'1.4.2'

In [4]:
# Pandas Series
# one-dimensional, labeled arrays, of any data type
# a sequence of values, with associated labels

students = ['Alpha', 'Bravo', 'Charlie']
type(students)

list

In [5]:
pd.Series(students)

0      Alpha
1      Bravo
2    Charlie
dtype: object

In [6]:
ages = [23, 45, 12]
pd.Series(ages)

0    23
1    45
2    12
dtype: int64

In [7]:
heights = [134.3, 145.6, 175.3]
pd.Series(heights)

0    134.3
1    145.6
2    175.3
dtype: float64

In [8]:
# Pandas Series automatically infers the datatype of values

# Unlike Numpy arrays, Pandas Series supports mixed datatypes

mixed = [True, 'say', {'a':'b'}]
pd.Series(mixed)

0          True
1           say
2    {'a': 'b'}
dtype: object

In [9]:
pd.Series(students)

pd.Series(data=students)

# data is the paramater, and students are value that we associate 
# with the data parameter

0      Alpha
1      Bravo
2    Charlie
dtype: object

0      Alpha
1      Bravo
2    Charlie
dtype: object

In [10]:
books_list = ['Fooled by Randomness', 'Sapiens', 'It Ends with Us']
list_s = pd.Series(books_list)

# Pandas series has an associated label 0,1,2 ..

books_dict = {0:'Fooled by Randomness', 1:'Sapiens', 2:'It Ends with Us'}
dict_s = pd.Series(books_dict)

list_s.equals(dict_s)

# Pandas series is a Python dict 
books_dict_new = {12:'Fooled by Randomness', 14:'Sapiens', 25:'It Ends with Us'}
pd.Series(books_dict_new)

True

12    Fooled by Randomness
14                 Sapiens
25         It Ends with Us
dtype: object

In [11]:
# Pandas is not critically dependent on labels or implied labels
# to be provided as inputs

# It defaults to integer labels when labels are not provided

pd.Series('Andy')

pd.Series(1)

0    Andy
dtype: object

0    1
dtype: int64

In [12]:
# Pandas automatically infers dtype when not provided

ages 
pd.Series(ages)

[23, 45, 12]

0    23
1    45
2    12
dtype: int64

In [13]:
pd.Series(ages, dtype='float')

0    23.0
1    45.0
2    12.0
dtype: float64

In [14]:
# dtype for Series containing a string is object 
# The reason for this is numpy 

students

names_series = pd.Series(students)
names_series 

names_series.dtype


['Alpha', 'Bravo', 'Charlie']

0      Alpha
1      Bravo
2    Charlie
dtype: object

dtype('O')

In [15]:
# dtype('O')
# Numpy stores data elements next to each other, and the arrays are homogenous 
# However strings have variable length 
# Numpy saves pointer of string object in memory 

heights2 = [134.3, '145.6', 175.3]
pd.Series(heights2)

0    134.3
1    145.6
2    175.3
dtype: object

In [16]:
# Index and RangeIndex

books_list

['Fooled by Randomness', 'Sapiens', 'It Ends with Us']

In [17]:
list_s

0    Fooled by Randomness
1                 Sapiens
2         It Ends with Us
dtype: object

In [18]:
pd.Series(data=books_list, index=['funny', 'anthro', 'emotional'], dtype='object')

funny        Fooled by Randomness
anthro                    Sapiens
emotional         It Ends with Us
dtype: object

In [19]:
pd.__version__

# string dtype

'1.4.2'

In [20]:
pd.Series(data=books_list, index=['funny', 'anthro', 'emotional'], dtype='string')

funny        Fooled by Randomness
anthro                    Sapiens
emotional         It Ends with Us
dtype: string

In [21]:
# RangeIndex: a builtin object that creates a series of integers with fixed
# differences specified in its step parameter
list_s.index
type(list_s.index)

RangeIndex(start=0, stop=3, step=1)

pandas.core.indexes.range.RangeIndex

In [22]:
# Lets access the Pandas RangeIndex
list(pd.RangeIndex(start=5, stop=9, step=1))

list(pd.RangeIndex(start=10, stop=-11, step=-2))

[5, 6, 7, 8]

[10, 8, 6, 4, 2, 0, -2, -4, -6, -8, -10]

In [23]:
# Why do we need a RangeIndex?
# RangeIndex is an immutable object 
# When not specified, it defaults to integers

In [24]:
# Intelligble naming convensions 

books_series = list_s
books_series

0    Fooled by Randomness
1                 Sapiens
2         It Ends with Us
dtype: object

In [25]:
# Attribute vs. method

# An attribute is a property of the object - a variable bound to the object
books_series.size 
books_series.dtype

# Method is an action applied to an object - a fucntion bound to the object

list_s.equals(dict_s)

3

dtype('O')

True

In [26]:
# The name attribute

books_series.name = "my favorite books"
books_series.name

books_series

# Why do we need the name attribute
# Name of the series becomes column name in the dataframe

'my favorite books'

0    Fooled by Randomness
1                 Sapiens
2         It Ends with Us
Name: my favorite books, dtype: object

In [27]:
books_series.index
books_series.index.name

books_series.index.name = 'My Books'

books_series.index.name


RangeIndex(start=0, stop=3, step=1)

'My Books'

In [28]:
# Skills challenge 

# 1. Create a Python list of length 4 that contains some of your favorite actors. 
# So this should be a list of strings. 
# Call this list actor_names 

actor_names = ['Sean Connery', 'Alia Bhatt', 'Meryl Streep', 'Morgan Freeman']
actor_names

# 2. Next, create another python list of the same length that contains your guesses
# for how old each actor is.
# Feel free to use integers or floats
# Call this list actor_ages

actor_ages = [90, 29, 72, 84]
actor_ages

# 3. Create a series that stores actor ages using actor names. 
# To clarify, use actor name in the index and actor age as values
# Name this series actors 

actors = pd.Series(data=actor_ages, index=actor_names, dtype=int)
print ("Creating Series from data and index lists")
actors

# Bonus: Repeat step 3 but this time create series uing Python dictionary
# Create dictionary dynamically using actor_names and actor_ages lists.

actors_dict = dict(zip(actor_names, actor_ages))
actors_series = pd.Series(actors_dict, dtype=int)
print ("Creating Series from dictionary")
actors_series

# Another way

actors_dict_2 = {name:age for name, age in zip(actor_names, actor_ages)}
actors_series = pd.Series(actors_dict_2, dtype=int)

['Sean Connery', 'Alia Bhatt', 'Meryl Streep', 'Morgan Freeman']

[90, 29, 72, 84]

Creating Series from data and index lists


Sean Connery      90
Alia Bhatt        29
Meryl Streep      72
Morgan Freeman    84
dtype: int64

Creating Series from dictionary


Sean Connery      90
Alia Bhatt        29
Meryl Streep      72
Morgan Freeman    84
dtype: int64