# Series:

In [1]:
import pandas as pd

In [2]:
data = pd.Series(data=[0.25,0.5,0.75,1.0])

In [3]:
data

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [4]:
data.keys()

RangeIndex(start=0, stop=4, step=1)

In [5]:
data.valid()

0    0.25
1    0.50
2    0.75
3    1.00
dtype: float64

In [6]:
data.index

RangeIndex(start=0, stop=4, step=1)

In [7]:
data = pd.Series(data=[0.25,0.5,0.75,1.0],index=["a","b","c","d"])

In [8]:
data

a    0.25
b    0.50
c    0.75
d    1.00
dtype: float64

In [10]:
data.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [11]:
data['a']

0.25

In [12]:
import numpy as np

In [14]:
np.round(data,decimals=1)

a    0.2
b    0.5
c    0.8
d    1.0
dtype: float64

In [15]:
population_dict = {'California': 38332521,
                   'Texas': 26448193,
                   'New York': 19651127,
                   'Florida': 19552860,
                   'Illinois': 12882135}

In [18]:
s=pd.Series(population_dict)

In [19]:
s

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

In [20]:
s['Texas']

26448193

In [21]:
s['Illinois':'Texas']

Illinois    12882135
New York    19651127
Texas       26448193
dtype: int64

In [22]:
data = {2:'a',3:'b',4:'c'}

In [23]:
pd.Series(data,index=[3,4])

3    b
4    c
dtype: object

In [24]:
pd.Series(5,index=[100,200,300])

100    5
200    5
300    5
dtype: int64

In [27]:
s.apply(np.sin)#applying functions on series

California    0.947749
Florida       0.903259
Illinois      0.231446
New York     -0.003917
Texas        -0.815607
dtype: float64

In [29]:
s.apply(lambda x: x if x > 50 else x+10 )#applying custom functions on series

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

In [30]:
cities = {"London":   8615246, 
          "Berlin":   3562166, 
          "Madrid":   3165235, 
          "Rome":     2874038, 
          "Paris":    2273305, 
          "Vienna":   1805681, 
          "Bucharest":1803425, 
          "Hamburg":  1760433,
          "Budapest": 1754000,
          "Warsaw":   1740119,
          "Barcelona":1602386,
          "Munich":   1493900,
          "Milan":    1350680}
city_series = pd.Series(cities)
print(city_series)

Barcelona    1602386
Berlin       3562166
Bucharest    1803425
Budapest     1754000
Hamburg      1760433
London       8615246
Madrid       3165235
Milan        1350680
Munich       1493900
Paris        2273305
Rome         2874038
Vienna       1805681
Warsaw       1740119
dtype: int64


In [31]:
my_cities = ["London", "Paris", "Zurich", "Berlin", 
             "Stuttgart", "Hamburg"]
my_city_series = pd.Series(cities, 
                           index=my_cities)
my_city_series

London       8615246.0
Paris        2273305.0
Zurich             NaN
Berlin       3562166.0
Stuttgart          NaN
Hamburg      1760433.0
dtype: float64

In [32]:
my_cities = ["London", "Paris", "Berlin", "Hamburg"]
my_city_series = pd.Series(cities, 
                           index=my_cities)
my_city_series

London     8615246
Paris      2273305
Berlin     3562166
Hamburg    1760433
dtype: int64

In [33]:
my_cities = ["London", "Paris", "Zurich", "Berlin", 
             "Stuttgart", "Hamburg"]
my_city_series = pd.Series(cities, 
                           index=my_cities)

In [34]:
print(my_city_series.isnull())

London       False
Paris        False
Zurich        True
Berlin       False
Stuttgart     True
Hamburg      False
dtype: bool


In [35]:
print(my_city_series.notnull())

London        True
Paris         True
Zurich       False
Berlin        True
Stuttgart    False
Hamburg       True
dtype: bool


In [36]:
print(my_city_series.dropna())#drop null values 

London     8615246.0
Paris      2273305.0
Berlin     3562166.0
Hamburg    1760433.0
dtype: float64


In [37]:
my_city_series.fillna(0)# fill null values with constant

London       8615246.0
Paris        2273305.0
Zurich             0.0
Berlin       3562166.0
Stuttgart          0.0
Hamburg      1760433.0
dtype: float64

In [39]:
missing_cities = {"Stuttgart":597939, "Zurich":378884}#fill null values by particular value individually
my_city_series.fillna(missing_cities)


London       8615246.0
Paris        2273305.0
Zurich        378884.0
Berlin       3562166.0
Stuttgart     597939.0
Hamburg      1760433.0
dtype: float64

In [40]:
s

California    38332521
Florida       19552860
Illinois      12882135
New York      19651127
Texas         26448193
dtype: int64

In [44]:
s[s>30000000]# applying filtering on series

California    38332521
dtype: int64

# Dataframe:

In [45]:
cities = {"name": ["London", "Berlin", "Madrid", "Rome", 
                   "Paris", "Vienna", "Bucharest", "Hamburg", 
                   "Budapest", "Warsaw", "Barcelona", 
                   "Munich", "Milan"],
          "population": [8615246, 3562166, 3165235, 2874038,
                         2273305, 1805681, 1803425, 1760433,
                         1754000, 1740119, 1602386, 1493900,
                         1350680],
          "country": ["England", "Germany", "Spain", "Italy",
                      "France", "Austria", "Romania", 
                      "Germany", "Hungary", "Poland", "Spain",
                      "Germany", "Italy"]}
city_frame = pd.DataFrame(cities)
city_frame

Unnamed: 0,country,name,population
0,England,London,8615246
1,Germany,Berlin,3562166
2,Spain,Madrid,3165235
3,Italy,Rome,2874038
4,France,Paris,2273305
5,Austria,Vienna,1805681
6,Romania,Bucharest,1803425
7,Germany,Hamburg,1760433
8,Hungary,Budapest,1754000
9,Poland,Warsaw,1740119


In [46]:
ordinals = ["first", "second", "third", "fourth",
            "fifth", "sixth", "seventh", "eigth",
            "ninth", "tenth", "eleventh", "twelvth",
            "thirteenth"]
city_frame = pd.DataFrame(cities, index=ordinals)
city_frame

Unnamed: 0,country,name,population
first,England,London,8615246
second,Germany,Berlin,3562166
third,Spain,Madrid,3165235
fourth,Italy,Rome,2874038
fifth,France,Paris,2273305
sixth,Austria,Vienna,1805681
seventh,Romania,Bucharest,1803425
eigth,Germany,Hamburg,1760433
ninth,Hungary,Budapest,1754000
tenth,Poland,Warsaw,1740119


In [47]:
city_frame = pd.DataFrame(cities,
                          columns=["name", 
                                   "country", 
                                   "population"],
                          index=ordinals)
city_frame

Unnamed: 0,name,country,population
first,London,England,8615246
second,Berlin,Germany,3562166
third,Madrid,Spain,3165235
fourth,Rome,Italy,2874038
fifth,Paris,France,2273305
sixth,Vienna,Austria,1805681
seventh,Bucharest,Romania,1803425
eigth,Hamburg,Germany,1760433
ninth,Budapest,Hungary,1754000
tenth,Warsaw,Poland,1740119


In [48]:
city_frame = pd.DataFrame(cities,
                          columns=["name", "population"],
                          index=cities["country"])
city_frame

Unnamed: 0,name,population
England,London,8615246
Germany,Berlin,3562166
Spain,Madrid,3165235
Italy,Rome,2874038
France,Paris,2273305
Austria,Vienna,1805681
Romania,Bucharest,1803425
Germany,Hamburg,1760433
Hungary,Budapest,1754000
Poland,Warsaw,1740119


In [49]:
city_frame = pd.DataFrame(cities)

In [50]:
city_frame2 = city_frame.set_index("country")
print(city_frame2)

              name  population
country                       
England     London     8615246
Germany     Berlin     3562166
Spain       Madrid     3165235
Italy         Rome     2874038
France       Paris     2273305
Austria     Vienna     1805681
Romania  Bucharest     1803425
Germany    Hamburg     1760433
Hungary   Budapest     1754000
Poland      Warsaw     1740119
Spain    Barcelona     1602386
Germany     Munich     1493900
Italy        Milan     1350680


In [51]:
city_frame = pd.DataFrame(cities)
city_frame.set_index("country", inplace=True)
print(city_frame)

              name  population
country                       
England     London     8615246
Germany     Berlin     3562166
Spain       Madrid     3165235
Italy         Rome     2874038
France       Paris     2273305
Austria     Vienna     1805681
Romania  Bucharest     1803425
Germany    Hamburg     1760433
Hungary   Budapest     1754000
Poland      Warsaw     1740119
Spain    Barcelona     1602386
Germany     Munich     1493900
Italy        Milan     1350680


In [52]:
print(city_frame.sum())

name          LondonBerlinMadridRomeParisViennaBucharestHamb...
population                                             33800614
dtype: object


In [53]:
city_frame["population"].sum()

33800614

In [54]:
x = city_frame["population"].cumsum()
print(x)

country
England     8615246
Germany    12177412
Spain      15342647
Italy      18216685
France     20489990
Austria    22295671
Romania    24099096
Germany    25859529
Hungary    27613529
Poland     29353648
Spain      30956034
Germany    32449934
Italy      33800614
Name: population, dtype: int64


In [55]:
city_frame["population"] = x
print(city_frame)

              name  population
country                       
England     London     8615246
Germany     Berlin    12177412
Spain       Madrid    15342647
Italy         Rome    18216685
France       Paris    20489990
Austria     Vienna    22295671
Romania  Bucharest    24099096
Germany    Hamburg    25859529
Hungary   Budapest    27613529
Poland      Warsaw    29353648
Spain    Barcelona    30956034
Germany     Munich    32449934
Italy        Milan    33800614


In [56]:
city_frame = pd.DataFrame(cities,
                          columns=["country", 
                                   "population",
                                   "cum_population"],
                          index=cities["name"])
city_frame

Unnamed: 0,country,population,cum_population
London,England,8615246,
Berlin,Germany,3562166,
Madrid,Spain,3165235,
Rome,Italy,2874038,
Paris,France,2273305,
Vienna,Austria,1805681,
Bucharest,Romania,1803425,
Hamburg,Germany,1760433,
Budapest,Hungary,1754000,
Warsaw,Poland,1740119,


In [57]:
city_frame["cum_population"] = city_frame["population"].cumsum()
city_frame

Unnamed: 0,country,population,cum_population
London,England,8615246,8615246
Berlin,Germany,3562166,12177412
Madrid,Spain,3165235,15342647
Rome,Italy,2874038,18216685
Paris,France,2273305,20489990
Vienna,Austria,1805681,22295671
Bucharest,Romania,1803425,24099096
Hamburg,Germany,1760433,25859529
Budapest,Hungary,1754000,27613529
Warsaw,Poland,1740119,29353648


In [58]:
city_frame = pd.DataFrame(cities,
                          columns=["country", 
                                   "area",
                                   "population"],
                          index=cities["name"])
print(city_frame)

           country area  population
London     England  NaN     8615246
Berlin     Germany  NaN     3562166
Madrid       Spain  NaN     3165235
Rome         Italy  NaN     2874038
Paris       France  NaN     2273305
Vienna     Austria  NaN     1805681
Bucharest  Romania  NaN     1803425
Hamburg    Germany  NaN     1760433
Budapest   Hungary  NaN     1754000
Warsaw      Poland  NaN     1740119
Barcelona    Spain  NaN     1602386
Munich     Germany  NaN     1493900
Milan        Italy  NaN     1350680


In [59]:
print(city_frame["population"])

London       8615246
Berlin       3562166
Madrid       3165235
Rome         2874038
Paris        2273305
Vienna       1805681
Bucharest    1803425
Hamburg      1760433
Budapest     1754000
Warsaw       1740119
Barcelona    1602386
Munich       1493900
Milan        1350680
Name: population, dtype: int64


In [60]:
city_frame["area"] = 1572
print(city_frame)

           country  area  population
London     England  1572     8615246
Berlin     Germany  1572     3562166
Madrid       Spain  1572     3165235
Rome         Italy  1572     2874038
Paris       France  1572     2273305
Vienna     Austria  1572     1805681
Bucharest  Romania  1572     1803425
Hamburg    Germany  1572     1760433
Budapest   Hungary  1572     1754000
Warsaw      Poland  1572     1740119
Barcelona    Spain  1572     1602386
Munich     Germany  1572     1493900
Milan        Italy  1572     1350680


In [61]:
area = [1572, 891.85, 605.77, 1285, 
        105.4, 414.6, 228, 755, 
        525.2, 517, 101.9, 310.4, 
        181.8]
city_frame["area"] = area
print(city_frame)

           country     area  population
London     England  1572.00     8615246
Berlin     Germany   891.85     3562166
Madrid       Spain   605.77     3165235
Rome         Italy  1285.00     2874038
Paris       France   105.40     2273305
Vienna     Austria   414.60     1805681
Bucharest  Romania   228.00     1803425
Hamburg    Germany   755.00     1760433
Budapest   Hungary   525.20     1754000
Warsaw      Poland   517.00     1740119
Barcelona    Spain   101.90     1602386
Munich     Germany   310.40     1493900
Milan        Italy   181.80     1350680


In [62]:
city_frame.ix["Hamburg"]

country       Germany
area              755
population    1760433
Name: Hamburg, dtype: object