In [34]:
# series - core data structure in pandas
import pandas as pd
# One of the easiest ways to create a series is to use an array-like object, like 
# a list. 


students = ['A', 'B', 'C']

# Now we just call the Series function in pandas and pass in the students
pd.Series(students)

0    A
1    B
2    C
dtype: object

In [35]:
# However, if we create a list of numbers, integers or floats, and put in the None type,
# pandas automatically converts this to a special floating point value designated as NaN, 
# which stands for "Not a Number".

# So let's create a list with a None value in it
numbers = [1, 2, None]
# And turn that into a series
pd.Series(numbers)

0    1.0
1    2.0
2    NaN
dtype: float64

In [36]:
# NaN is a different value. pandas
# set the dytpe of this series to floating point numbers instead of object or ints. Underneath, pandas
# represents NaN as a floating point number, and because integers can be typecast to
# floats, pandas went and converted our integers to floats. 
# when you're wondering why the list of integers you put into a Series is not floats
# it's probably because there is some missing data.
import numpy as np
# And lets compare it to None
np.nan == None

False

In [37]:
# you can't do an equality test of NAN to itself
# the answer is always False. 
np.nan == np.nan

False

In [38]:
# Instead, you need to use special functions to test for the presence of not a number, 
# such as the Numpy library isnan().

np.isnan(np.nan)

np.True_

In [39]:
# A series can be created directly from dictionary data
# the index is automatically assigned to the keys of the dictionary that you provided and not just 
# incrementing integers.

# Here's an example using some data of students and their classes.

students_scores = {'A': 'Physics',
                   'B': 'Chemistry',
                   'C': 'English'}
s = pd.Series(students_scores)
s

A      Physics
B    Chemistry
C      English
dtype: object

In [40]:
# We see that, since it was string data, pandas set the data type of the series to "object".
# We see that the index, the first column, is also a list of strings.

In [41]:
# Once the series has been created, we can get the index object using the index attribute.

s.index

Index(['A', 'B', 'C'], dtype='object')

In [42]:
# The dtype of object is not just for strings, but for
# arbitrary objects. Lets create a more complex type of data, say, a list of tuples.
students = [("A","Brown"), ("B", "White"), ("C", "Green")]
pd.Series(students)

0    (A, Brown)
1    (B, White)
2    (C, Green)
dtype: object

In [43]:
# We see that each of the tuples is stored in the series object, and the type of the 
# whole thing is object.

In [46]:
# You can also separate your index creation from the data by passing in the index as a 
# list explicitly to the series.
s = pd.Series(['Phy','Chem','Maths'], index=['A','B','C'])
s

A      Phy
B     Chem
C    Maths
dtype: object

In [48]:
student_scores = {'A': 'Phy',
                  'B': 'Chem',
                  'C': 'Maths'}
                  
                  
s = pd.Series(student_scores, index=['A','C','D'])
s

A      Phy
C    Maths
D      NaN
dtype: object