In [1]:
import pandas as pd

In [3]:
#lets create a series of student names
students = ['alice','max','brian']
#we just call in the series function in pandas and pass in the students
pd.Series(students)

0    alice
1      max
2    brian
dtype: object

In [4]:
#lets create a list of numbers
numbers = [1,2,3,4]
pd.Series(numbers)

0    1
1    2
2    3
3    4
dtype: int64

In [None]:
#we can see automatically pandas calls the type of the series as either objects or int64 as shown above

## Querrying a series

In [19]:
#a pandas series can be querried either by index position or the index label.
#if you do not give up an index to the series when querying, the position and the label are effectively the same thing
#to query by numeric loctation, starting at zero, ue the iloc attribute.
#to query by index label, use the loc attribute

import pandas as pd
student_classes = {'alice': 'physics',
                  'jack': 'chemistry',
                  'molly': 'english',
                  'sam': 'history'}
ss = pd.Series(student_classes)
ss

alice      physics
jack     chemistry
molly      english
sam        history
dtype: object

In [20]:
#in this series if we wanted to see the fourth item we would use the iloc attribute with the parameter 3
#rem python indexing starts at zero
ss.iloc[3]

'history'

In [21]:
#if we want to see the class molly is we would use the loc attribute
ss.loc['molly']

'english'

In [22]:
#its importan to note that iloc and loc are not methods but attributes. that explains why we have to use square brackets on them and not parenthesis

#pandas also tries to make our code more readable and provides a sort of smart syntax using the indexing operator directly on the series itself.
#for instance if you pass in an integer parameter the operator will behave as if you want to query using the iloc attribute
ss[3]


'history'

In [23]:
#and if you pass in the object it will query as if you wanted to use the iloc attribute
ss['molly']

'english'

In [28]:
#so what happens if your index i a list of integer? its a bit complicated and pandas cannot determine automatically
#whether you are intending to query by index position or index label. so you need to be careful when using the indexing operator on the series itself
#the safer option is to be more exolicit and use the iloc or loc attributes directly

class_codes = {'99': 'physics',
              '100': 'chemistry',
              '101': 'english',
              '102': 'history'}
s = pd.Series(class_codes)
s

99       physics
100    chemistry
101      english
102      history
dtype: object

In [29]:
#if we try to call s[0] we get a keyerror because there is no item in the class list with an index of zero.
s[0]

'physics'

In [30]:
#i guess its part of the update i did not get a keyerror as the tutor said
#lemme just try out the iloc attribute
s.iloc[0]

'physics'

In [32]:
s.loc['99']

'physics'

In [33]:
#am now certified it is part of the update

In [34]:
#now we know how to get data from a series, lets talk about working with the data.
#a common task is to want to consider all the values inside of a series and do some sort of operation
#this could be trying to find a certain number or summarizing the data or transforming the data in some way


In [35]:
#a typical programatic approach to this would be to iterate over all the items in the series and invoke the operation one is interested in
#for instance we could create a series of integers representing students grades and just try and get an average grade

grades = pd.Series([90,80,70,60])

total = 0 
for grade in grades:
    total = total + grade
    mean = total/len(grades)
mean

75.0

In [None]:
#this works but is really slow. modern computers can do any tasks simultaneously especially but not only tasks involving mathematics
#pandas and the underlying numpy libraries support a method of computation called vectorization.
#vectorization works with most of the functions in the numpy library. including the sum function


In [36]:
#heres how we would really write the code using the numpy sum method. 
#first we import numpy module
import numpy as np

#then we call np.um and pass in an iterable item. in this case our pandas series.

total = np.sum(grades)
print(total/len(grades))

75.0


## adding into a series

In [38]:
#we can also add into a series by using the loc attribute
x = pd.Series([1,2,2,3,4])

#we could add some new value 

x.loc['new'] = 7

x


0      1
1      2
2      2
3      3
4      4
new    7
dtype: int64

In [40]:
#lets create a series with students and the courses they have taken.
student_classes = pd.Series({'alice': 'english',
                            'ken': 'physics',
                            'jack': 'maths',
                            'owen': 'chemistry'})
student_classes

alice      english
ken        physics
jack         maths
owen     chemistry
dtype: object

In [47]:
#now lets create a series jut for some new student kelly which lists all the courses she is taking
#we will set the index to kelly and the data to be names of course

kelly_classes = pd.Series(['philosophy','maths', 'arts'], index = ['kelly', 'kelly', 'kelly'])
kelly_classes

kelly    philosophy
kelly         maths
kelly          arts
dtype: object

In [48]:
#finally we can append kelly classes to the student classes by using the append function

all_student_classes = student_classes.append(kelly_classes)

all_student_classes

alice       english
ken         physics
jack          maths
owen      chemistry
kelly    philosophy
kelly         maths
kelly          arts
dtype: object

In [49]:
#the append method does not actually change the underlying series objects, it instead returns a new series which is made up of the two appended together
#this is a common pattern in pandas by default returning a new object instead of modifying the one in place.
#by printing the original series we see that it has not changed
student_classes

alice      english
ken        physics
jack         maths
owen     chemistry
dtype: object

In [50]:
#finally we see that when we querry the appended series for kelly, we do not get a single value but a series itself
all_student_classes.loc['kelly']

kelly    philosophy
kelly         maths
kelly          arts
dtype: object

In [3]:
import pandas as pd

In [None]:
15364344302990.0,6348608932836.1,5542207638235.176,3493025339072.8477,2681724635761.589,
         10  2487906661418.4175,2189794143774.905,2120175089933.7761,1769297396603.86,1660647466307.512,
    ---> 11  1565459478480.661,1418078278145.694,1164042729991.427,1106714508244.852,444155754051.095

In [7]:
countries = {'United States':        15364344302990.0,
'China':                 6348608932836.1,
'Japan':                 5542207638235.176,
'Germany':               3493025339072.8477,
'France':                2681724635761.589,
'United Kingdom':        2487906661418.4175,
'Brazil':                2189794143774.905,
'Italy':                 2120175089933.7761,
'India':                 1769297396603.86,
'Canada':                1660647466307.512,
'Russian Federation':    1565459478480.661,
'Spain':                 1418078278145.694,
'Australia':             1164042729991.427,
'South Korea':           1106714508244.852,
'Iran':                  444155754051.095}
s = pd.Series(countries)
s

United States         1.536434e+13
China                 6.348609e+12
Japan                 5.542208e+12
Germany               3.493025e+12
France                2.681725e+12
United Kingdom        2.487907e+12
Brazil                2.189794e+12
Italy                 2.120175e+12
India                 1.769297e+12
Canada                1.660647e+12
Russian Federation    1.565459e+12
Spain                 1.418078e+12
Australia             1.164043e+12
South Korea           1.106715e+12
Iran                  4.441558e+11
dtype: float64