In [1]:
import pandas as pd  # This is the standard way of importing the Pandas library
import numpy as np

In [3]:
wh = pd.read_csv("https://raw.githubusercontent.com/csmastersUH/data_analysis_with_python_2020/master/kumpula-weather-2017.csv")
wh.head()  # The head method prints the first 5 rows

Unnamed: 0,Year,m,d,Time,Time zone,Precipitation amount (mm),Snow depth (cm),Air temperature (degC)
0,2017,1,1,00:00,UTC,-1.0,-1.0,0.6
1,2017,1,2,00:00,UTC,4.4,-1.0,-3.9
2,2017,1,3,00:00,UTC,6.6,7.0,-6.5
3,2017,1,4,00:00,UTC,-1.0,13.0,-12.8
4,2017,1,5,00:00,UTC,-1.0,10.0,-17.8


In [5]:
wh["Snow depth (cm)"].head()  # Using the tab key can help enter long column names

0    -1.0
1    -1.0
2     7.0
3    13.0
4    10.0
Name: Snow depth (cm), dtype: float64

In [7]:
wh["Air temperature (degC)"].mean()  # Mean temperature

6.527123287671233

In [9]:
wh.drop("Time zone", axis=1).head()  # Return a copy with one column removed,
# the original DataFrame stays intact

Unnamed: 0,Year,m,d,Time,Precipitation amount (mm),Snow depth (cm),Air temperature (degC)
0,2017,1,1,00:00,-1.0,-1.0,0.6
1,2017,1,2,00:00,4.4,-1.0,-3.9
2,2017,1,3,00:00,6.6,7.0,-6.5
3,2017,1,4,00:00,-1.0,13.0,-12.8
4,2017,1,5,00:00,-1.0,10.0,-17.8


In [11]:
wh.head()  # Original DataFrame is unchanged

Unnamed: 0,Year,m,d,Time,Time zone,Precipitation amount (mm),Snow depth (cm),Air temperature (degC)
0,2017,1,1,00:00,UTC,-1.0,-1.0,0.6
1,2017,1,2,00:00,UTC,4.4,-1.0,-3.9
2,2017,1,3,00:00,UTC,6.6,7.0,-6.5
3,2017,1,4,00:00,UTC,-1.0,13.0,-12.8
4,2017,1,5,00:00,UTC,-1.0,10.0,-17.8


In [13]:
wh["Rainy"] = wh["Precipitation amount (mm)"] > 5
wh.head()

Unnamed: 0,Year,m,d,Time,Time zone,Precipitation amount (mm),Snow depth (cm),Air temperature (degC),Rainy
0,2017,1,1,00:00,UTC,-1.0,-1.0,0.6,False
1,2017,1,2,00:00,UTC,4.4,-1.0,-3.9,False
2,2017,1,3,00:00,UTC,6.6,7.0,-6.5,True
3,2017,1,4,00:00,UTC,-1.0,13.0,-12.8,False
4,2017,1,5,00:00,UTC,-1.0,10.0,-17.8,False


Creation and indexing of series

In [16]:
s = pd.Series([1, 4, 5, 2, 5, 2])
s

0    1
1    4
2    5
3    2
4    5
5    2
dtype: int64

In [18]:
s.name = "Grades"
s

0    1
1    4
2    5
3    2
4    5
5    2
Name: Grades, dtype: int64

In [20]:
print(f"Name: {s.name}, dtype: {s.dtype}, size: {s.size}")

Name: Grades, dtype: int64, size: 6


In [22]:
s[1]

4

In [24]:
s2 = s[[0, 5]]  # Fancy indexing
print(s2)

0    1
5    2
Name: Grades, dtype: int64


In [26]:
t = s[-2:]  # Slicing
t

4    5
5    2
Name: Grades, dtype: int64

In [28]:
t[4]  # t[0] would give an error

5

In [30]:
s2.values

array([1, 2])

In [32]:
s2.index

Index([0, 5], dtype='int64')

In [36]:
s3 = pd.Series([1, 4, 5, 2, 5, 2],
             index=list("abcdef"))
s3

a    1
b    4
c    5
d    2
e    5
f    2
dtype: int64

In [38]:
s3.index

Index(['a', 'b', 'c', 'd', 'e', 'f'], dtype='object')

In [40]:
s3["b"]

4

In [42]:
s3["b":"e"]

b    4
c    5
d    2
e    5
dtype: int64

In [44]:
s3[1]

  s3[1]


4

In [46]:
s4 = pd.Series(["Jack", "Jones", "James"], index=[1, 2, 3])
s4

1     Jack
2    Jones
3    James
dtype: object

In [48]:
print(s4.loc[1])
print(s4.iloc[1])

Jack
Jones


Exercise 3.13 (read series)

In [51]:
def read_series():
    """Reads input lines from the user and returns a Series,
    input index and corresponding value separated by whitespace"""
    import pandas as pd
    index = []
    values = []
    while True:
        lines = input()
        if len(lines) == 0:
            break
        elif len(lines.split()) != 2:
            raise Exception
        else:
            index.append(lines.split()[0])
            values.append(lines.split()[1])
    return pd.Series(values, index=index)

def main():
    print(read_series())

if __name__ == "__main__":
    main()

 0 a
 1 b
 2 c
 


0    a
1    b
2    c
dtype: object


Suggested solution:

In [56]:
import pandas as pd
 
def read_series():
    values=[]
    indices=[]
    while True:
        line = input("")
        if not line:
            break
        i, v = line.split()
        values.append(v)
        indices.append(i)
    s = pd.Series(values, index=indices)
    return s
 
def main():
    print(read_series())
 
if __name__ == "__main__":
    main()

 0 a
 1 b
 2 c
 


0    a
1    b
2    c
dtype: object


Exercise 3.14 (operations on series)

In [59]:
import pandas as pd

def create_series(L1, L2):
    """Gets two list of numbers of length 3,
    returns one series with values from first list,
    returns second series with values from second list,
    indices are a, b, and c for both
    """
    s1 = pd.Series(L1, index=["a", "b", "c"])
    s2 = pd.Series(L2, index=["a", "b", "c"])
    return (s1, s2)
    
def modify_series(s1, s2):
    """Add first series a new value with index d that is
    same as the value from s2 with index b, deletes value with
    index b from s2
    """
    s1["d"] = s2["b"]
    del s2["b"]
    return (s1, s2)
    
def main():
    L1 = [1, 2, 3]
    L2 = [4, 5, 6]
    s1, s2 = create_series(L1, L2)
    s1, s2 = modify_series(s1, s2)
    print(s1 + s2)
    
if __name__ == "__main__":
    main()

a    5.0
b    NaN
c    9.0
d    NaN
dtype: float64


Suggested solution:

In [62]:
import pandas as pd
 
def create_series(L1, L2):
    indices = list("abc")
    s1 = pd.Series(L1, indices)
    s2 = pd.Series(L2, indices)
    return (s1, s2)
    
def modify_series(s1, s2):
    s1["d"] = s2["b"]
    del s2["b"]
    return s1, s2
    
def main():
    s1, s2 = create_series([2,3,4], [9,8,7])
    print("Original:")
    print(s1)
    print(s2)
    s1, s2 = modify_series(s1, s2)
    print("Modified:")
    print(s1)
    print(s2)
    print("Addition:")
    print(s1 + s2)
    print("""Note that the resulting type gets 
    converted to float to accomodate the missing value symbol NaN""")
    
if __name__ == "__main__":
    main()

Original:
a    2
b    3
c    4
dtype: int64
a    9
b    8
c    7
dtype: int64
Modified:
a    2
b    3
c    4
d    8
dtype: int64
a    9
c    7
dtype: int64
Addition:
a    11.0
b     NaN
c    11.0
d     NaN
dtype: float64
Note that the resulting type gets 
    converted to float to accomodate the missing value symbol NaN


Exercise 3.15 (inverse series)

In [65]:
import pandas as pd

def inverse_series(s):
    """Returns series with indices and values swapped"""
    new_values = s.index
    new_indices = s.values
    return pd.Series(new_values, index=new_indices)

def main():
    test = pd.Series([0, 1, 1, 2, 3], index=[1, 1, 1, 1, 1])
    print(inverse_series(test))

if __name__ == "__main__":
    main()

0    1
1    1
1    1
2    1
3    1
dtype: int64


Suggested solution:

In [68]:
import pandas as pd
 
def inverse_series(s):
    return pd.Series(s.index, s.values)
 
def main():
    s = pd.Series([1,2,3,1], index=list("abcd"))
    print(inverse_series(s))
 
if __name__ == "__main__":
    main()

1    a
2    b
3    c
1    d
dtype: object


In [70]:
d = { 2001 : "Bush", 2005 : "Bush", 2009 : "Obama", 2013 : "Obama", 2017 : "Trump"}
s4 = pd.Series(d, name="Presidents")
s4

2001     Bush
2005     Bush
2009    Obama
2013    Obama
2017    Trump
Name: Presidents, dtype: object