# Creating a Pandas Series with a List

In [1]:
import pandas as pd

In [2]:
pd.Series([1, 2, 3, 4, 5])

0    1
1    2
2    3
3    4
4    5
dtype: int64

In [3]:
list_data = [1, 2, 3, 4, 5]
list_index = ["a", "b", "c", "d", "e"]

In [4]:
variable = pd.Series(data=list_data, index=list_index, dtype="float")

In [5]:
variable

a    1.0
b    2.0
c    3.0
d    4.0
e    5.0
dtype: float64

In [6]:
type(variable)

pandas.core.series.Series

In [7]:
list_data = [1, 2, 3, 4, 5,6]
list_index = ["a", "b", "c", "d", "e"]

In [8]:
variable = pd.Series(data=list_data, index=list_index, dtype="float")

ValueError: Length of values (6) does not match length of index (5)

# Creating a Pandas Series with a Dictionary

In [9]:
dictionary = {"ferrari" : 334.7, "porche" : 337.9, "lamborghini" : 349}

In [10]:
pd.Series(dictionary)

ferrari        334.7
porche         337.9
lamborghini    349.0
dtype: float64

In [11]:
pd.Series(data = dictionary)

ferrari        334.7
porche         337.9
lamborghini    349.0
dtype: float64

In [12]:
variable = pd.Series(data = dictionary, index = ["ferrari", "bugatti", "lamborghini"])

In [13]:
variable

ferrari        334.7
bugatti          NaN
lamborghini    349.0
dtype: float64

# Creating Pandas Series with NumPy Array

In [14]:
import numpy as np

In [15]:
example = np.array([1,3.3,5,7.2,9])
labels = np.array(["a", "b", "c", "d", "e"])

In [16]:
variable = pd.Series(data = example, index = labels)

In [17]:
variable

a    1.0
b    3.3
c    5.0
d    7.2
e    9.0
dtype: float64

# Object Types in Series

In [18]:
pd.Series(["world", 100, 5.8, True])

0    world
1      100
2      5.8
3     True
dtype: object

In [19]:
example = pd.Series(["world", 100, 5.8, True])

In [20]:
example

0    world
1      100
2      5.8
3     True
dtype: object

In [21]:
example[0]

'world'

In [22]:
print(type(example[0]))

<class 'str'>


In [23]:
print(type(example[1]))
print(type(example[2]))
print(type(example[3]))

<class 'int'>
<class 'float'>
<class 'bool'>


In [24]:
pd.Series([sum, type, max])

0    <built-in function sum>
1             <class 'type'>
2    <built-in function max>
dtype: object

# Examining the Primary Features of the Pandas Series

In [25]:
example = pd.Series([10, 20, 30, 40, 50, 60, 70])

In [26]:
example

0    10
1    20
2    30
3    40
4    50
5    60
6    70
dtype: int64

In [27]:
example.axes

[RangeIndex(start=0, stop=7, step=1)]

In [28]:
example.dtype

dtype('int64')

In [29]:
example.size

7

In [30]:
example.ndim

1

In [31]:
example.head()

0    10
1    20
2    30
3    40
4    50
dtype: int64

In [32]:
example.head(3)

0    10
1    20
2    30
dtype: int64

In [33]:
example.tail()

2    30
3    40
4    50
5    60
6    70
dtype: int64

# Most Applied Methods on Pandas Series

In [34]:
variable = pd.Series([8, 3, 6, 5, 1], index = ["b", "d", "c", "a", "e"])

In [35]:
variable

b    8
d    3
c    6
a    5
e    1
dtype: int64

In [36]:
new_variable = variable.sort_index()

In [37]:
new_variable

a    5
b    8
c    6
d    3
e    1
dtype: int64

In [38]:
new_variable2 = variable.sort_values()

In [39]:
new_variable2

e    1
d    3
a    5
c    6
b    8
dtype: int64

In [40]:
variable.isin([6, 1])

b    False
d    False
c     True
a    False
e     True
dtype: bool

In [41]:
variable[variable.isin([6, 1])]

c    6
e    1
dtype: int64

In [42]:
variable.values

array([8, 3, 6, 5, 1], dtype=int64)

In [43]:
[i for i in variable.values]

[8, 3, 6, 5, 1]

In [44]:
variable.index

Index(['b', 'd', 'c', 'a', 'e'], dtype='object')

In [45]:
[i for i in variable.index]

['b', 'd', 'c', 'a', 'e']

In [46]:
variable.items

<bound method Series.items of b    8
d    3
c    6
a    5
e    1
dtype: int64>

In [47]:
variable.items()

<zip at 0x215993df840>

In [48]:
list(variable.items())

[('b', 8), ('d', 3), ('c', 6), ('a', 5), ('e', 1)]

In [49]:
for index, value in variable.items():
    print(index,'-', value)

b - 8
d - 3
c - 6
a - 5
e - 1


# Indexing and Slicing Pandas Series

In [50]:
example = pd.Series(["+90", "+49", "+33", "+39", "+46", "+47"], index = ["Turkey", "Germany", "France", "Italy", "Sweden", "Norway"]

In [51]:
example

Turkey     +90
Germany    +49
France     +33
Italy      +39
Sweden     +46
Norway     +47
dtype: object

In [52]:
example["Germany"]

'+49'

In [54]:
example[1]

'+49'

In [55]:
example["France":"Sweden"]

France    +33
Italy     +39
Sweden    +46
dtype: object

In [56]:
example[2:5]

France    +33
Italy     +39
Sweden    +46
dtype: object

In [57]:
example[2:5:2]

France    +33
Sweden    +46
dtype: object

In [58]:
example[["Norway", "Turkey"]]

Norway    +47
Turkey    +90
dtype: object

# Creating Pandas DataFrame with List

In [59]:
list_example = [5, 10, 15, 20, 25]

In [60]:
list_example

[5, 10, 15, 20, 25]

In [61]:
pd.DataFrame(list_example, columns = ["values"], dtype = "float")

Unnamed: 0,values
0,5.0
1,10.0
2,15.0
3,20.0
4,25.0


# Creating Pandas DataFrame with NumPy Array

In [62]:
array_example = np.random.randint(0,50,(4,4))

In [63]:
array_example

array([[ 7, 22, 46, 42],
       [47, 48, 13, 35],
       [34, 24, 30,  2],
       [33, 47, 16, 11]])

In [65]:
df = pd.DataFrame(array_example, index = ["a", "b", "c", "d"], columns = ["val1", "val2", "val3", "val4"])

In [66]:
df

Unnamed: 0,val1,val2,val3,val4
a,7,22,46,42
b,47,48,13,35
c,34,24,30,2
d,33,47,16,11


# Creating Pandas DataFrame with Dictionary

In [67]:
dp = np.random.randint(10,50, size=6)
dp

array([12, 43, 27, 46, 41, 45])

In [68]:
x = np.random.randint(10,50, size=6)
y = np.random.randint(10,50, size=6)
z = np.random.randint(10,50, size=6)

In [69]:
dict_example = {"val1": x, 'val2': y, "val3": z, "val4": dp}

In [70]:
dict_example

{'val1': array([26, 45, 26, 48, 41, 31]),
 'val2': array([39, 10, 49, 16, 33, 16]),
 'val3': array([15, 19, 17, 22, 35, 25]),
 'val4': array([12, 43, 27, 46, 41, 45])}

In [72]:
pd.DataFrame(dict_example, index = ["a", "b", "c", "d", "e", "f"])

Unnamed: 0,val1,val2,val3,val4
a,26,39,15,12
b,45,10,19,43
c,26,49,17,27
d,48,16,22,46
e,41,33,35,41
f,31,16,25,45


# Examining the Properties of Pandas DataFrames

In [73]:
x = np.random.randint(10, 50, size = 6)
y = np.random.randint(10, 50, size = 6)
z = np.random.randint(10, 50, size = 6)
t = np.random.randint(10, 50, size = 6)

dict_example = {"val1":x, "val2":y, "val3":z, "val4":t}

df = pd.DataFrame(dict_example)

In [74]:
df

Unnamed: 0,val1,val2,val3,val4
0,43,47,38,19
1,10,28,13,18
2,10,49,30,23
3,29,10,25,45
4,23,21,37,11
5,11,34,21,14


In [75]:
df.head()

Unnamed: 0,val1,val2,val3,val4
0,43,47,38,19
1,10,28,13,18
2,10,49,30,23
3,29,10,25,45
4,23,21,37,11


In [76]:
df.head(2)

Unnamed: 0,val1,val2,val3,val4
0,43,47,38,19
1,10,28,13,18


In [77]:
df.tail()

Unnamed: 0,val1,val2,val3,val4
1,10,28,13,18
2,10,49,30,23
3,29,10,25,45
4,23,21,37,11
5,11,34,21,14


In [78]:
df.columns

Index(['val1', 'val2', 'val3', 'val4'], dtype='object')

In [79]:
df.values

array([[43, 47, 38, 19],
       [10, 28, 13, 18],
       [10, 49, 30, 23],
       [29, 10, 25, 45],
       [23, 21, 37, 11],
       [11, 34, 21, 14]])

In [80]:
[i for i in df.columns]

['val1', 'val2', 'val3', 'val4']

In [81]:
df.columns = ["new1", "new2", "new3", "new4"]

In [82]:
df

Unnamed: 0,new1,new2,new3,new4
0,43,47,38,19
1,10,28,13,18
2,10,49,30,23
3,29,10,25,45
4,23,21,37,11
5,11,34,21,14


In [84]:
type(df.values)

numpy.ndarray

In [85]:
type(df)

pandas.core.frame.DataFrame

In [86]:
df.shape

(6, 4)

In [87]:
df.ndim

2

In [88]:
df.size

24

# Element Selection Operations in Pandas DataFrames: Lesson 1

In [1]:
import numpy as np
import pandas as pd

In [2]:
np.random.seed(101)
df = pd.DataFrame(data = np.random.randn(6,5),index = "A B C D E F".split(), columns = ["val1 val2 val3 val4 val5".split()])

In [3]:
df

Unnamed: 0,val1,val2,val3,val4,val5
A,2.70685,0.628133,0.907969,0.503826,0.651118
B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
D,0.955057,0.190794,1.978757,2.605967,0.683509
E,0.302665,1.693723,-1.706086,-1.159119,-0.134841
F,0.390528,0.166905,0.184502,0.807706,0.07296


In [4]:
df['val1']

Unnamed: 0,val1
A,2.70685
B,-0.319318
C,0.528813
D,0.955057
E,0.302665
F,0.390528


In [5]:
df.val2

Unnamed: 0,val2
A,0.628133
B,-0.848077
C,-0.589001
D,0.190794
E,1.693723
F,0.166905


In [6]:
df.val3.values

array([[ 0.90796945],
       [ 0.60596535],
       [ 0.18869531],
       [ 1.97875732],
       [-1.70608593],
       [ 0.18450186]])

In [7]:
df.val4.val5

AttributeError: 'DataFrame' object has no attribute 'val5'

In [9]:
df[["val1"]]

Unnamed: 0,val1
A,2.70685
B,-0.319318
C,0.528813
D,0.955057
E,0.302665
F,0.390528


In [11]:
df[["val1"]]["B":"D"]

Unnamed: 0,val1
B,-0.319318
C,0.528813
D,0.955057


In [13]:
df[["val1"]][1:4]

Unnamed: 0,val1
B,-0.319318
C,0.528813
D,0.955057


# Element Selection Operations in Pandas DataFrames: Lesson 2

In [14]:
df[["val1", "val2"]]

Unnamed: 0,val1,val2
A,2.70685,0.628133
B,-0.319318,-0.848077
C,0.528813,-0.589001
D,0.955057,0.190794
E,0.302665,1.693723
F,0.390528,0.166905


In [15]:
df[["val1", "val2"]][2:4]

Unnamed: 0,val1,val2
C,0.528813,-0.589001
D,0.955057,0.190794


In [16]:
variable = ["val3", "val4"]

In [18]:
df[variable]

Unnamed: 0,val3,val4
A,0.907969,0.503826
B,0.605965,-2.018168
C,0.188695,-0.758872
D,1.978757,2.605967
E,-1.706086,-1.159119
F,0.184502,0.807706


In [19]:
df["B":"D"]

Unnamed: 0,val1,val2,val3,val4,val5
B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
D,0.955057,0.190794,1.978757,2.605967,0.683509


In [20]:
df[1:4]

Unnamed: 0,val1,val2,val3,val4,val5
B,-0.319318,-0.848077,0.605965,-2.018168,0.740122
C,0.528813,-0.589001,0.188695,-0.758872,-0.933237
D,0.955057,0.190794,1.978757,2.605967,0.683509


In [21]:
df["E"]

KeyError: 'E'

In [22]:
df["E":"E"]

Unnamed: 0,val1,val2,val3,val4,val5
E,0.302665,1.693723,-1.706086,-1.159119,-0.134841


In [23]:
df["E":"E"][["val2","val5"]]

Unnamed: 0,val2,val5
E,1.693723,-0.134841


# Top Level Element Selection in Pandas DataFrames: Structure of loc and iloc: Lesson 1

In [24]:
np.random.seed(101)

array = np.random.randint(10, 50, (10, 5))
df = pd.DataFrame(data = array , index = "A B C D E F G H I J".split(), columns = "VAL1 VAL2 VAL3 VAL4 VAL5".split())

In [25]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [26]:
df.loc["A"]

VAL1    41
VAL2    21
VAL3    27
VAL4    16
VAL5    33
Name: A, dtype: int32

In [29]:
df.loc["A":"D"]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29


In [30]:
df.iloc[0:5]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10


In [31]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [33]:
df.loc['C', 'VAL4']

39

In [34]:
df.iloc[2,3]

39

# Top Level Element Selection in Pandas DataFrames: Structure of loc and iloc: Lesson 2

In [35]:
np.random.seed(101)

array = np.random.randint(10, 50, (10, 5))
df = pd.DataFrame(data = array , index = "A B C D E F G H I J".split(), columns = "VAL1 VAL2 VAL3 VAL4 VAL5".split())

In [36]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [37]:
df.loc['C':'H', 'VAL4']

C    39
D    18
E    33
F    29
G    49
H    49
Name: VAL4, dtype: int32

In [38]:
df.loc['C':'H', ['VAL4']]

Unnamed: 0,VAL4
C,39
D,18
E,33
F,29
G,49
H,49


In [39]:
df.loc["C":"G"][["VAL3"]]

Unnamed: 0,VAL3
C,22
D,44
E,41
F,46
G,20


In [40]:
df.iloc[2:7, 2]

C    22
D    44
E    41
F    46
G    20
Name: VAL3, dtype: int32

In [41]:
df.iloc[2:7, [2]]

Unnamed: 0,VAL3
C,22
D,44
E,41
F,46
G,20


In [42]:
df.iloc[2:7][["VAL3"]]

Unnamed: 0,VAL3
C,22
D,44
E,41
F,46
G,20


# Top Level Element Selection in Pandas DataFrames: Structure of loc and iloc: Lesson 3

In [44]:
df.loc["C":"H","VAL2":"VAL4"]

Unnamed: 0,VAL2,VAL3,VAL4
C,15,22,39
D,39,44,18
E,22,41,33
F,18,46,29
G,17,20,49
H,28,17,49


In [45]:
df.iloc[2:8,1:4]

Unnamed: 0,VAL2,VAL3,VAL4
C,15,22,39
D,39,44,18
E,22,41,33
F,18,46,29
G,17,20,49
H,28,17,49


In [47]:
df.iloc[2:8,1:4].loc["E":"F", ["VAL3"]]

Unnamed: 0,VAL3
E,41
F,46


# Element Selection with Conditional Operations in Pandas Data Frames

In [48]:
df > 20

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,True,True,True,False,True
B,True,False,True,False,True
C,False,False,True,True,True
D,False,True,True,False,True
E,False,True,True,True,False
F,False,False,True,True,True
G,True,False,False,True,True
H,False,True,False,True,True
I,False,True,True,True,True
J,True,True,True,True,True


In [49]:
df[df>20]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41.0,21.0,27.0,,33.0
B,21.0,,23.0,,38.0
C,,,22.0,39.0,29.0
D,,39.0,44.0,,29.0
E,,22.0,41.0,33.0,
F,,,46.0,29.0,45.0
G,38.0,,,49.0,48.0
H,,28.0,,49.0,25.0
I,,22.0,27.0,21.0,25.0
J,43.0,39.0,34.0,46.0,29.0


In [50]:
df['VAL1']>20

A     True
B     True
C    False
D    False
E    False
F    False
G     True
H    False
I    False
J     True
Name: VAL1, dtype: bool

In [51]:
df[df['VAL1']>20]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
G,38,17,20,49,48
J,43,39,34,46,29


In [52]:
df[df['VAL1']<20]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
C,10,15,22,39,29
D,18,39,44,18,29
F,19,18,46,29,45
H,19,28,17,49,25
I,10,22,27,21,25


In [53]:
df[df["VAL1"] < 20]["VAL2"]

C    15
D    39
F    18
H    28
I    22
Name: VAL2, dtype: int32

In [54]:
df[df["VAL1"] < 20][["VAL2"]]

Unnamed: 0,VAL2
C,15
D,39
F,18
H,28
I,22


In [55]:
df[df["VAL1"] < 20][["VAL2",'VAL5']]

Unnamed: 0,VAL2,VAL5
C,15,29
D,39,29
F,18,45
H,28,25
I,22,25


In [56]:
df

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38
C,10,15,22,39,29
D,18,39,44,18,29
E,20,22,41,33,10
F,19,18,46,29,45
G,38,17,20,49,48
H,19,28,17,49,25
I,10,22,27,21,25
J,43,39,34,46,29


In [57]:
df[('VAL1 > 20')& ('VAL2'<18)]

TypeError: '<' not supported between instances of 'str' and 'int'

In [58]:
df[(df["VAL1"] > 20) & (df["VAL4"] < 18)]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,41,21,27,16,33
B,21,19,23,14,38


In [60]:
df[(df < 35)| (df['VAL5'] > 20)]

Unnamed: 0,VAL1,VAL2,VAL3,VAL4,VAL5
A,,21.0,27.0,16.0,33.0
B,21.0,19.0,23.0,14.0,
C,10.0,15.0,22.0,,29.0
D,18.0,,,18.0,29.0
E,20.0,22.0,,33.0,10.0
F,19.0,18.0,,29.0,
G,,17.0,20.0,,
H,19.0,28.0,17.0,,25.0
I,10.0,22.0,27.0,21.0,25.0
J,,,34.0,,29.0


In [61]:
df.loc[df.VAL2 > 25, ["VAL2", "VAL3", "VAL5"]]

Unnamed: 0,VAL2,VAL3,VAL5
D,39,44,29
H,28,17,25
J,39,34,29
