# CREATING A SERIES

In [1]:
import pandas as pd
my_list = [10, 20, 30, 40, 50]
my_series = pd.Series(my_list)
print(my_series)

0    10
1    20
2    30
3    40
4    50
dtype: int64


# SETTING INDEX LABELS


In [2]:
my_series = pd.Series(my_list, index=['a', 'b', 'c', 'd', 'e'])
print(my_series)

a    10
b    20
c    30
d    40
e    50
dtype: int64


# EXTRACTING COMPONENTS

In [3]:
my_series.index

Index(['a', 'b', 'c', 'd', 'e'], dtype='object')

In [4]:
my_series.values

array([10, 20, 30, 40, 50], dtype=int64)

# DICTIONARY TO SERIES

In [5]:
population_dict = {'Amsterdam': 821752, 'Rotterdam': 623652, 'The Hague': 514861, 'Utrecht': 345043, 'Eindhoven': 223027}
population = pd.Series(population_dict)
print(population)

Amsterdam    821752
Rotterdam    623652
The Hague    514861
Utrecht      345043
Eindhoven    223027
dtype: int64


In [6]:
population.index

Index(['Amsterdam', 'Rotterdam', 'The Hague', 'Utrecht', 'Eindhoven'], dtype='object')

In [7]:
population.values

array([821752, 623652, 514861, 345043, 223027], dtype=int64)

# NAME ATTRIBUTE


In [8]:
population

Amsterdam    821752
Rotterdam    623652
The Hague    514861
Utrecht      345043
Eindhoven    223027
dtype: int64

In [9]:
population.name = 'Population'
population.index.name = 'city'
population

city
Amsterdam    821752
Rotterdam    623652
The Hague    514861
Utrecht      345043
Eindhoven    223027
Name: Population, dtype: int64

# ACCESSING ELEMENTS

In [10]:
print(population['Amsterdam'])
print(population['Rotterdam'])

821752
623652


In [11]:
print(population[0])
print(population[1])

821752
623652


# SLICING A SERIES

In [12]:
population

city
Amsterdam    821752
Rotterdam    623652
The Hague    514861
Utrecht      345043
Eindhoven    223027
Name: Population, dtype: int64

In [13]:
print(population['Amsterdam':'Utrecht'])

city
Amsterdam    821752
Rotterdam    623652
The Hague    514861
Utrecht      345043
Name: Population, dtype: int64


In [14]:
print(population[0:2])

city
Amsterdam    821752
Rotterdam    623652
Name: Population, dtype: int64


# ARITHMETIC OPERATIONS

In [15]:
population/1000

city
Amsterdam    821.752
Rotterdam    623.652
The Hague    514.861
Utrecht      345.043
Eindhoven    223.027
Name: Population, dtype: float64

In [16]:
series1 = pd.Series([1, 2, 3, 4, 5], index=['a', 'b', 'c', 'd', 'e'])
series2 = pd.Series([6, 7, 8, 9, 10], index=['a', 'b', 'd', 'e', 'f'])
print(series1 + series2)

a     7.0
b     9.0
c     NaN
d    12.0
e    14.0
f     NaN
dtype: float64


# BOOLEAN OPERATIONS

In [17]:
print(population > 500000)

city
Amsterdam     True
Rotterdam     True
The Hague     True
Utrecht      False
Eindhoven    False
Name: Population, dtype: bool


# FILTERING A SERIES

using boolean series

In [18]:
print(population[population > 500000])


city
Amsterdam    821752
Rotterdam    623652
The Hague    514861
Name: Population, dtype: int64


# SERIES METHODS


In [19]:
population.describe()

count         5.000000
mean     505667.000000
std      234307.573534
min      223027.000000
25%      345043.000000
50%      514861.000000
75%      623652.000000
max      821752.000000
Name: Population, dtype: float64

In [20]:
universities = pd.Series(data=[ 'Utrecht University','Leiden University','Utrecht University',
'Radboud University Nijmegen','Leiden University','University of Groningen',
'Tilburg University'
       ])

In [21]:
universities

0             Utrecht University
1              Leiden University
2             Utrecht University
3    Radboud University Nijmegen
4              Leiden University
5        University of Groningen
6             Tilburg University
dtype: object

In [22]:
universities.unique()

array(['Utrecht University', 'Leiden University',
       'Radboud University Nijmegen', 'University of Groningen',
       'Tilburg University'], dtype=object)

In [23]:
universities.nunique()

5

# VALUE COUNTS

In [24]:
import numpy as np

In [25]:
array_universities=np.array(universities)

In [26]:
array_universities.size

7

In [27]:
array_universities.shape

(7,)

In [28]:
print(universities.value_counts())

Utrecht University             2
Leiden University              2
Radboud University Nijmegen    1
University of Groningen        1
Tilburg University             1
dtype: int64


# ASCENDING SORT & DESCENDING SORT


In [29]:
area = pd.Series({'Rotterdam': 41.4, 'Utrecht': 30.5, 'Amsterdam': 219.3, 'Eindhoven': 30.5, 'The Hague': 31.4})

In [30]:
area.sort_values()

Utrecht       30.5
Eindhoven     30.5
The Hague     31.4
Rotterdam     41.4
Amsterdam    219.3
dtype: float64

In [31]:
area.sort_values(ascending=False)

Amsterdam    219.3
Rotterdam     41.4
The Hague     31.4
Utrecht       30.5
Eindhoven     30.5
dtype: float64

# INPLACE ARGUMENT


In [32]:
area.sort_values(ascending=False, inplace=True)


In [33]:
area

Amsterdam    219.3
Rotterdam     41.4
The Hague     31.4
Utrecht       30.5
Eindhoven     30.5
dtype: float64

# SORTING BY INDEX

In [34]:
area.sort_index()

Amsterdam    219.3
Eindhoven     30.5
Rotterdam     41.4
The Hague     31.4
Utrecht       30.5
dtype: float64

# NAN VALUES

In [35]:
print(area.isnull())


Amsterdam    False
Rotterdam    False
The Hague    False
Utrecht      False
Eindhoven    False
dtype: bool


In [36]:
print(area.isnull().sum())


0


# AGGREGATION


In [37]:
area

Amsterdam    219.3
Rotterdam     41.4
The Hague     31.4
Utrecht       30.5
Eindhoven     30.5
dtype: float64

In [38]:
area.sum()

353.09999999999997

In [39]:
area.mean()

70.61999999999999

In [40]:
area.std()

83.24209872414318

# DATAFRAME