# Week 6: Pandas



In [1]:
import numpy as np

In [2]:
import pandas as pd

##Pandas series

In [3]:
S1 = pd.Series([1,2,5,7])
print(S1)

0    1
1    2
2    5
3    7
dtype: int64


In [4]:
S2 = pd.Series([1,4,5.6,7])
print(S2)

0    1.0
1    4.0
2    5.6
3    7.0
dtype: float64


In [5]:
S3 = pd.Series(["John",2,True,9.8])
print(S3)

0    John
1       2
2    True
3     9.8
dtype: object


##Boolean indexing: similar to numpy

In [6]:
S1

0    1
1    2
2    5
3    7
dtype: int64

In [7]:
S2 = pd.Series([2,1,4,9])

In [8]:
S5 = S1 > S2

In [9]:
print(S5)

0    False
1     True
2     True
3    False
dtype: bool


In [10]:
S1[S1>S2]

1    2
2    5
dtype: int64

Here, we can see that like numpy, instead of refering to the exact index, we can define a boolean value (based on the condition that we want) that guide us to the related place.

Difference btwn pandas series and normal series is the vectorized form of the pandas series. But why we prefer pandas?

YOU CAN PROVIDE NAMES FOR ITEMS!

In [11]:
S6 = pd.Series([1,3,5,4], ["a","b","c","d"])
S6

a    1
b    3
c    5
d    4
dtype: int64

In [12]:
S7 = pd.Series([3,5,6,7], ["a","b","d","c"])
S7

a    3
b    5
d    6
c    7
dtype: int64

In [13]:
S8 = S6 + S7
S8

a     4
b     8
c    12
d    10
dtype: int64

So, in case of summing up with pandas defined series, it considers the names instead of the orders!

##Use Pandas series for our application

In [14]:
Q_heating = pd.Series([1150, 1240, 12], ["wall", "ceiling", "door"])
Q_heating

wall       1150
ceiling    1240
door         12
dtype: int64

**We can also creat a Pandas series from a dictionary:**

In [15]:
Q_heating_dict = {"wall":1150, "ceiling":1240, "door":12}
Q_heating = pd.Series(Q_heating_dict)
Q_heating

wall       1150
ceiling    1240
door         12
dtype: int64

In [16]:
Q_door = Q_heating["door"]
Q_door

12

In [17]:
item_list = ["wall", "ceiling", "door"]
opaque_U = pd.Series([0.438,0.25,1.78], index = item_list)
opaque_area = pd.Series([105.8,200,2.2], index = item_list)
T_heating = pd.Series([20,-4.8], index=["T_in", "T_out"])

In [18]:
deltaT_heating = T_heating["T_in"]-T_heating["T_out"]

##Applying a function to the series

In [19]:
def toKw(inputValue):
  outputValue = inputValue/1000
  return outputValue

With ".apply", we can apply a defined function to all the members of a serie:

In [20]:
Q_heating_kW = Q_heating.apply(toKw)
Q_heating_kW

wall       1.150
ceiling    1.240
door       0.012
dtype: float64

##Data frames: series for 2D matrices

In [21]:
resistance_name = ["R1","R2","R3","R4","R5"]
resistance_type = ["conv", "cond", "cond", "cond","conv"]
resistance_h = [10,None,None,None,25]
resistance_k = [None,0.81,0.5,0.05,None]
resistance_L = [None,0.5,0.3,0.6,None]
resistance_R = [0,0,0,0,0]


We want to define a list of the above lists:

In [22]:

resistance_listOfList = [resistance_type,resistance_h,resistance_k,resistance_L,resistance_R]
resistance_listOfList

[['conv', 'cond', 'cond', 'cond', 'conv'],
 [10, None, None, None, 25],
 [None, 0.81, 0.5, 0.05, None],
 [None, 0.5, 0.3, 0.6, None],
 [0, 0, 0, 0, 0]]

In [23]:
resistance_DF = pd.DataFrame(resistance_listOfList, index=["type","h","k","L","R"], columns=resistance_name)
resistance_DF

Unnamed: 0,R1,R2,R3,R4,R5
type,conv,cond,cond,cond,conv
h,10,,,,25
k,,0.81,0.5,0.05,
L,,0.5,0.3,0.6,
R,0,0,0,0,0


I do not the table like this. I prefer that resistances would be rows.
We need to TRANSPOSE the matrix (change the position of the columns and rows):

In [24]:
resistance_DF = resistance_DF.transpose()
print(resistance_DF)

    type     h     k     L  R
R1  conv    10  None  None  0
R2  cond  None  0.81   0.5  0
R3  cond  None   0.5   0.3  0
R4  cond  None  0.05   0.6  0
R5  conv    25  None  None  0


There is a difference between showing a DataFrame with **print(DataFrame)** or **DataFram** itself:

In [27]:
resistance_DF

Unnamed: 0,type,h,k,L,R
R1,conv,10.0,,,0
R2,cond,,0.81,0.5,0
R3,cond,,0.5,0.3,0
R4,cond,,0.05,0.6,0
R5,conv,25.0,,,0


How to extract items from the DataFrame:

In [25]:
resistance_DF.loc["R1", :]

type    conv
h         10
k       None
L       None
R          0
Name: R1, dtype: object

Do calculations:

In [26]:
resistance_DF.loc[:,"type"]=="cond"

R1    False
R2     True
R3     True
R4     True
R5    False
Name: type, dtype: bool