# CREATING A SERIES

In [1]:
import pandas as pd
my_list = [10, 20, 30, 40, 50]
my_series = pd.Series(my_list)
print(my_series)

0    10
1    20
2    30
3    40
4    50
dtype: int64


# SETTING INDEX LABELS


In [2]:
my_series = pd.Series(my_list, index=['a', 'b', 'c', 'd', 'e'])
print(my_series)

a    10
b    20
c    30
d    40
e    50
dtype: int64


# EXTRACTING COMPONENTS

In [3]:
my_series.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [4]:
my_series.values

array([10, 20, 30, 40, 50], dtype=int64)

# DICTIONARY TO SERIES

In [5]:
population_dict = {'Amsterdam': 821752, 'Rotterdam': 623652, 'The Hague': 514861, 'Utrecht': 345043, 'Eindhoven': 223027}
population = pd.Series(population_dict)
print(population)

Amsterdam    821752
Rotterdam    623652
The Hague    514861
Utrecht      345043
Eindhoven    223027
dtype: int64


In [6]:
population.index

Index(['Amsterdam', 'Rotterdam', 'The Hague', 'Utrecht', 'Eindhoven'], dtype='object')

In [7]:
population.values

array([821752, 623652, 514861, 345043, 223027], dtype=int64)

# NAME ATTRIBUTE


In [8]:
population

Amsterdam    821752
Rotterdam    623652
The Hague    514861
Utrecht      345043
Eindhoven    223027
dtype: int64

In [9]:
population.name = 'Population'
population.index.name = 'city'
population

city
Amsterdam    821752
Rotterdam    623652
The Hague    514861
Utrecht      345043
Eindhoven    223027
Name: Population, dtype: int64

# ACCESSING ELEMENTS

In [10]:
print(population['Amsterdam'])
print(population['Rotterdam'])

821752
623652


In [11]:
print(population[0])
print(population[1])

821752
623652


# SLICING A SERIES

In [12]:
population

city
Amsterdam    821752
Rotterdam    623652
The Hague    514861
Utrecht      345043
Eindhoven    223027
Name: Population, dtype: int64

In [13]:
print(population['Amsterdam':'Utrecht'])

city
Amsterdam    821752
Rotterdam    623652
The Hague    514861
Utrecht      345043
Name: Population, dtype: int64


In [14]:
print(population[0:2])

city
Amsterdam    821752
Rotterdam    623652
Name: Population, dtype: int64


# ARITHMETIC OPERATIONS

In [15]:
population/1000

city
Amsterdam    821.752
Rotterdam    623.652
The Hague    514.861
Utrecht      345.043
Eindhoven    223.027
Name: Population, dtype: float64

In [16]:
series1 = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
series2 = pd.Series([6, 7, 8, 9, 10], index=['a', 'b', 'd', 'e', 'f'])
print(series1 + series2)

a     7.0
b     9.0
c     NaN
d    12.0
e    14.0
f     NaN
dtype: float64


# BOOLEAN OPERATIONS

In [17]:
print(population > 500000)

city
Amsterdam     True
Rotterdam     True
The Hague     True
Utrecht      False
Eindhoven    False
Name: Population, dtype: bool


# FILTERING A SERIES

using boolean series

In [18]:
print(population[population > 500000])


city
Amsterdam    821752
Rotterdam    623652
The Hague    514861
Name: Population, dtype: int64


# SERIES METHODS


In [19]:
population.describe()

count         5.000000
mean     505667.000000
std      234307.573534
min      223027.000000
25%      345043.000000
50%      514861.000000
75%      623652.000000
max      821752.000000
Name: Population, dtype: float64

In [20]:
universities = pd.Series(data=[ 'Utrecht University','Leiden University','Utrecht University',
'Radboud University Nijmegen','Leiden University','University of Groningen',
'Tilburg University'
       ])

In [21]:
universities

0             Utrecht University
1              Leiden University
2             Utrecht University
3    Radboud University Nijmegen
4              Leiden University
5        University of Groningen
6             Tilburg University
dtype: object

In [22]:
universities.unique()

array(['Utrecht University', 'Leiden University',
       'Radboud University Nijmegen', 'University of Groningen',
       'Tilburg University'], dtype=object)

In [23]:
universities.nunique()

5

# VALUE COUNTS

In [24]:
import numpy as np

In [25]:
array_universities=np.array(universities)

In [26]:
array_universities.size

7

In [27]:
array_universities.shape

(7,)

In [28]:
print(universities.value_counts())

Utrecht University             2
Leiden University              2
Radboud University Nijmegen    1
University of Groningen        1
Tilburg University             1
dtype: int64


# ASCENDING SORT & DESCENDING SORT


In [29]:
area = pd.Series({'Rotterdam': 41.4, 'Utrecht': 30.5, 'Amsterdam': 219.3, 'Eindhoven': 30.5, 'The Hague': 31.4})

In [30]:
area.sort_values()

Utrecht       30.5
Eindhoven     30.5
The Hague     31.4
Rotterdam     41.4
Amsterdam    219.3
dtype: float64

In [31]:
area.sort_values(ascending=False)

Amsterdam    219.3
Rotterdam     41.4
The Hague     31.4
Utrecht       30.5
Eindhoven     30.5
dtype: float64

# INPLACE ARGUMENT


In [32]:
area.sort_values(ascending=False, inplace=True)


In [33]:
area

Amsterdam    219.3
Rotterdam     41.4
The Hague     31.4
Utrecht       30.5
Eindhoven     30.5
dtype: float64

# SORTING BY INDEX

In [34]:
area.sort_index()

Amsterdam    219.3
Eindhoven     30.5
Rotterdam     41.4
The Hague     31.4
Utrecht       30.5
dtype: float64

# NAN VALUES

In [35]:
print(area.isnull())


Amsterdam    False
Rotterdam    False
The Hague    False
Utrecht      False
Eindhoven    False
dtype: bool


In [36]:
print(area.isnull().sum())


0


# AGGREGATION


In [37]:
area

Amsterdam    219.3
Rotterdam     41.4
The Hague     31.4
Utrecht       30.5
Eindhoven     30.5
dtype: float64

In [38]:
area.sum()

353.09999999999997

In [39]:
area.mean()

70.61999999999999

In [40]:
area.std()

83.24209872414318

# DATAFRAME

In [41]:
data = {'Name': ['Valheim', 'Among Us', 'Minecraft', 'Fortnite', 'Genshin Impact'],
        'Year': [2021, 2018, 2011, 2017, 2020],
        'Genre': ['Survival', 'Social Deduction', 'Sandbox', 'Battle Royale', 'Action RPG'],
        'Developer': ['Iron Gate Studio', 'InnerSloth', 'Mojang', 'Epic Games', 'miHoYo'],
        'Price': [19.99, 4.99, 26.95, 0.00, 0.00]}

In [42]:
df = pd.DataFrame(data)

In [43]:
df

Unnamed: 0,Name,Year,Genre,Developer,Price
0,Valheim,2021,Survival,Iron Gate Studio,19.99
1,Among Us,2018,Social Deduction,InnerSloth,4.99
2,Minecraft,2011,Sandbox,Mojang,26.95
3,Fortnite,2017,Battle Royale,Epic Games,0.0
4,Genshin Impact,2020,Action RPG,miHoYo,0.0


In [44]:
data = [{'Name': 'Valheim', 'Year': 2021, 'Genre': 'Survival', 'Developer': 'Iron Gate Studio', 'Price': 19.99},
        {'Name': 'Among Us', 'Year': 2018, 'Genre': 'Social Deduction', 'Developer': 'InnerSloth', 'Price': 4.99},
        {'Name': 'Minecraft', 'Year': 2011, 'Genre': 'Sandbox', 'Developer': 'Mojang', 'Price': 26.95},
        {'Name': 'Fortnite', 'Year': 2017, 'Genre': 'Battle Royale', 'Developer': 'Epic Games', 'Price': 0.00},
        {'Name': 'Genshin Impact', 'Year': 2020, 'Genre': 'Action RPG', 'Developer': 'miHoYo', 'Price': 0.00}]


In [45]:
df = pd.DataFrame(data)
df

Unnamed: 0,Name,Year,Genre,Developer,Price
0,Valheim,2021,Survival,Iron Gate Studio,19.99
1,Among Us,2018,Social Deduction,InnerSloth,4.99
2,Minecraft,2011,Sandbox,Mojang,26.95
3,Fortnite,2017,Battle Royale,Epic Games,0.0
4,Genshin Impact,2020,Action RPG,miHoYo,0.0


In [46]:
data = [['Valheim', 2021, 'Survival', 'Iron Gate Studio', 19.99],
        ['Among Us', 2018, 'Social Deduction', 'InnerSloth', 4.99],
        ['Minecraft', 2011, 'Sandbox', 'Mojang', 26.95],
        ['Fortnite', 2017, 'Battle Royale', 'Epic Games', 0.00],
        ['Genshin Impact', 2020, 'Action RPG', 'miHoYo', 0.00]]

In [47]:
df = pd.DataFrame(data, columns=['Name', 'Year', 'Genre', 'Developer', 'Price'])


In [48]:
df.head(3)

Unnamed: 0,Name,Year,Genre,Developer,Price
0,Valheim,2021,Survival,Iron Gate Studio,19.99
1,Among Us,2018,Social Deduction,InnerSloth,4.99
2,Minecraft,2011,Sandbox,Mojang,26.95


In [49]:
df.sample(2)

Unnamed: 0,Name,Year,Genre,Developer,Price
1,Among Us,2018,Social Deduction,InnerSloth,4.99
3,Fortnite,2017,Battle Royale,Epic Games,0.0


In [50]:
df.columns

Index(['Name', 'Year', 'Genre', 'Developer', 'Price'], dtype='object')

In [51]:
df.columns.tolist()

['Name', 'Year', 'Genre', 'Developer', 'Price']

In [52]:
df.rename(columns={'Name': 'Game', 'Year': 'Release Year', 'Genre': 'Game Genre', 'Developer': 'Game Developer', 'Price': 'Game Price'}, inplace=True)

In [53]:
df.dtypes


Game               object
Release Year        int64
Game Genre         object
Game Developer     object
Game Price        float64
dtype: object

In [54]:
df['Game']
df.Game

0           Valheim
1          Among Us
2         Minecraft
3          Fortnite
4    Genshin Impact
Name: Game, dtype: object

In [55]:
df[['Game', 'Game Genre']]


Unnamed: 0,Game,Game Genre
0,Valheim,Survival
1,Among Us,Social Deduction
2,Minecraft,Sandbox
3,Fortnite,Battle Royale
4,Genshin Impact,Action RPG


In [56]:
df.index

RangeIndex(start=0, stop=5, step=1)

In [57]:
df.set_index('Game', inplace=True)

In [58]:
df

Unnamed: 0_level_0,Release Year,Game Genre,Game Developer,Game Price
Game,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Valheim,2021,Survival,Iron Gate Studio,19.99
Among Us,2018,Social Deduction,InnerSloth,4.99
Minecraft,2011,Sandbox,Mojang,26.95
Fortnite,2017,Battle Royale,Epic Games,0.0
Genshin Impact,2020,Action RPG,miHoYo,0.0


In [59]:
df.reset_index(drop=False, inplace=True)
df

Unnamed: 0,Game,Release Year,Game Genre,Game Developer,Game Price
0,Valheim,2021,Survival,Iron Gate Studio,19.99
1,Among Us,2018,Social Deduction,InnerSloth,4.99
2,Minecraft,2011,Sandbox,Mojang,26.95
3,Fortnite,2017,Battle Royale,Epic Games,0.0
4,Genshin Impact,2020,Action RPG,miHoYo,0.0


In [60]:
df1 = pd.DataFrame([[1, 2], [4, 5], [7, 8]], index=[1, 2, 3], columns=['col1', 'col2'])

In [61]:
df1

Unnamed: 0,col1,col2
1,1,2
2,4,5
3,7,8


In [62]:
df1.iloc[0]

col1    1
col2    2
Name: 1, dtype: int64

In [63]:
df1.iloc[1:2]

Unnamed: 0,col1,col2
2,4,5


In [64]:
df1.loc[1:2]

Unnamed: 0,col1,col2
1,1,2
2,4,5


In [65]:
df1.iloc[0, 0]

1

In [66]:
df1.loc[1, 'col1']

1

In [67]:
df1.iloc[0, 0] = 10

In [68]:
df1.loc[[2, 3], 'col2'] = 100

In [69]:
df1.replace(100, 200)

Unnamed: 0,col1,col2
1,10,2
2,4,200
3,7,200


In [70]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5 entries, 0 to 4
Data columns (total 5 columns):
 #   Column          Non-Null Count  Dtype  
---  ------          --------------  -----  
 0   Game            5 non-null      object 
 1   Release Year    5 non-null      int64  
 2   Game Genre      5 non-null      object 
 3   Game Developer  5 non-null      object 
 4   Game Price      5 non-null      float64
dtypes: float64(1), int64(1), object(3)
memory usage: 332.0+ bytes


In [71]:
df.describe()

Unnamed: 0,Release Year,Game Price
count,5.0,5.0
mean,2017.4,10.386
std,3.911521,12.363836
min,2011.0,0.0
25%,2017.0,0.0
50%,2018.0,4.99
75%,2020.0,19.99
max,2021.0,26.95


In [72]:
df['Game Price'] = df['Game Price'].astype('int64')

In [73]:
def dollar_to_euro(price):
    return price * 0.92
df['Game Price'] = df['Game Price'].apply(dollar_to_euro)
df

Unnamed: 0,Game,Release Year,Game Genre,Game Developer,Game Price
0,Valheim,2021,Survival,Iron Gate Studio,17.48
1,Among Us,2018,Social Deduction,InnerSloth,3.68
2,Minecraft,2011,Sandbox,Mojang,23.92
3,Fortnite,2017,Battle Royale,Epic Games,0.0
4,Genshin Impact,2020,Action RPG,miHoYo,0.0


In [74]:
df['Game Genre'].map({'Action RPG': 0, 'Battle Royale': 1, 'Sandbox': 2, 'Survival': 3, 'Social Deduction': 4})

0    3
1    4
2    2
3    1
4    0
Name: Game Genre, dtype: int64

In [75]:
df.sort_values(by='Game Price', ascending=False)

Unnamed: 0,Game,Release Year,Game Genre,Game Developer,Game Price
2,Minecraft,2011,Sandbox,Mojang,23.92
0,Valheim,2021,Survival,Iron Gate Studio,17.48
1,Among Us,2018,Social Deduction,InnerSloth,3.68
3,Fortnite,2017,Battle Royale,Epic Games,0.0
4,Genshin Impact,2020,Action RPG,miHoYo,0.0
