- The series is one of the core data structures in pandas. 
- You think of it a cross between a list and a dictionary. 
- Series can only contain single list with index, whereas dataframe can be made of more than one series or we can say that a dataframe is a collection of series that can be used to analyse the data.


### Series

#### Indexing Series

In [None]:
# Let's import pandas to get started
import pandas as pd #series and dataframe
import numpy as np #Arrays

In [None]:

students = ['Alice', 'Jack', 'Molly']
a = pd.Series(students)
a

In [None]:
for i in a:
    print(i)


In [None]:
 
numbers = [1, 2, 3]
pd.Series(numbers)


#### Dealing with Missing data

In [None]:

students = ['Alice', 'Jack', None]
a = pd.Series(students)
a

In [None]:
# None with numbers
numbers = [1, 2, None]
# And turn that into a series
numbers_series = pd.Series(numbers)
numbers_series

In [None]:
numbers_series[2] == np.nan #this is not the correct way to check this. 

In [None]:


# NaN is *NOT* equivilent to None and when we try the equality test, the result is False.

# Lets bring in numpy which allows us to generate an NaN value
import numpy as np
# And lets compare it to None
np.nan == None

In [None]:
# It turns out that you actually can't do an equality test of NAN to itself. When you do, 
# the answer is always False. 

np.nan == np.nan #INCorrect way

In [None]:
np.isnan(np.nan) #The correct way

In [None]:
np.isnan(numbers_series[2])

#### Series to dictionaries

In [None]:
students_scores = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English'}
s = pd.Series(students_scores)
s

In [None]:
#OR:
s = pd.Series(['Physics', 'Chemistry', 'English'], index=['Alice', 'Jack', 'Molly'])
s

In [None]:
# Once the series has been created, we can get the index object using the index attribute.

s.index

In [None]:
# Lets create a more complex type of data, say, a list of tuples.
students = [("Alice","Brown"), ("Jack", "White"), ("Molly", "Green")]
pd.Series(students)


In [None]:

students_scores = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English'}
# I create a series from the above dictionary but added another guy called Sam. 

s = pd.Series(students_scores, index=[ 'Molly', 'Sam', 'Mehmet','Jack'])
s

#### Querying Series

In [26]:
import pandas as pd
students_classes = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'}
s = pd.Series(students_classes)
s

Alice      Physics
Jack     Chemistry
Molly      English
Sam        History
dtype: object

In [None]:
s[0]

In [27]:
s.iloc[2]

'English'

In [None]:
s.loc[0]

In [None]:
s.loc["Alice"]

In [None]:
s[0] ## = s.iloc[0]

In [None]:
s["Alice"]

In [None]:
class_code = {3: 'Physics',
              4: 'Chemistry',
              5: 'English',
              6: 'History'}
s = pd.Series(class_code)
s

In [None]:
s[0] #==>error. Hence be careful. Use loc and iloc wherever it's possible. 

In [None]:
s = pd.Series([1, 2, 3])

# We could add some new value, maybe a university course
s.loc['History'] = "102.2"

s

### Not-unique index values:


In [22]:
kelly_classes = pd.Series(['Philosophy', 'Arts', 'Math'], index=['Kelly', 'Kelly', 'Kelly'])
kelly_classes

Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

In [25]:
dict = {'Alice': 'Physics',
                   'Jack': 'Chemistry',
                   'Molly': 'English',
                   'Sam': 'History'}
students_classes = pd.Series(dict)

all_students_classes = students_classes.append(kelly_classes)

# This creates a series which has our original people in it as well as all of Kelly's courses
all_students_classes


Alice       Physics
Jack      Chemistry
Molly       English
Sam         History
Kelly    Philosophy
Kelly          Arts
Kelly          Math
dtype: object

### Faster Built in operations : VECTORIZATION

In [28]:
numbers = pd.Series(np.random.randint(0,1000,10000))#generate random numbers
# can do this with the head() function
numbers.head()


0     69
1    508
2    161
3    656
4    435
dtype: int32

In [None]:
len(numbers)

In [None]:
# vectorization of summation
total1 = 0
for i in numbers:
    total1+=i



total2 = np.sum(numbers)# much more faster



In [None]:
# vectorization of addition
for label, value in numbers.iteritems():
    # print(label, value)
    # now for the item which is returned, lets call set_value()
    numbers.set_value(label, value+2)
# And we can check the result of this computation


#or 
numbers+=2

