   # Learning Python

## Pandas

In [3]:
# Python Version
from platform import python_version
print(python_version())

3.8.5


In [4]:
import pandas as pd
pd.__version__

'1.1.3'

In [5]:
from pandas import Series

## Series

One-dimensional ndarray with axis labels (including time series).

In [6]:
obj1 = Series([67, 78, -56, 13])

In [7]:
print(obj1)

0    67
1    78
2   -56
3    13
dtype: int64


In [8]:
type(obj1)

pandas.core.series.Series

In [9]:
obj1.values

array([ 67,  78, -56,  13])

In [10]:
obj1.index

RangeIndex(start=0, stop=4, step=1)

In [11]:
obj2 = Series([67, 78, -56, 13], index = ['a', 'b', 'c', 'd'])

In [12]:
obj2

a    67
b    78
c   -56
d    13
dtype: int64

In [13]:
obj2.values

array([ 67,  78, -56,  13])

In [14]:
obj2.index

Index(['a', 'b', 'c', 'd'], dtype='object')

In [15]:
obj2[obj2 > 0]

a    67
b    78
d    13
dtype: int64

In [16]:
'd' in obj2

True

In [17]:
dict1 = {'Football':5200, 'Tenis': 120, 'Swimming':698, 'Volleyball':1550}

In [18]:
obj3 = Series(dict1)

In [19]:
obj3

Football      5200
Tenis          120
Swimming       698
Volleyball    1550
dtype: int64

In [20]:
type(obj3)

pandas.core.series.Series

In [21]:
list1 = ['Football', 'Tenis', 'Swimming', 'Basktetball']

In [22]:
obj4 = Series(dict1, index=list1)

In [23]:
obj4

Football       5200.0
Tenis           120.0
Swimming        698.0
Basktetball       NaN
dtype: float64

In [24]:
pd.isnull(obj4)

Football       False
Tenis          False
Swimming       False
Basktetball     True
dtype: bool

In [25]:
pd.notnull(obj4)

Football        True
Tenis           True
Swimming        True
Basktetball    False
dtype: bool

In [26]:
obj4.isnull()

Football       False
Tenis          False
Swimming       False
Basktetball     True
dtype: bool

In [27]:
obj3

Football      5200
Tenis          120
Swimming       698
Volleyball    1550
dtype: int64

In [28]:
obj4

Football       5200.0
Tenis           120.0
Swimming        698.0
Basktetball       NaN
dtype: float64

In [29]:
obj5 = obj3 + obj4

In [30]:
obj5

Basktetball        NaN
Football       10400.0
Swimming        1396.0
Tenis            240.0
Volleyball         NaN
dtype: float64

In [31]:
obj5.name = 'population'

In [32]:
obj5.index.name = 'sports'

In [33]:
obj5

sports
Basktetball        NaN
Football       10400.0
Swimming        1396.0
Tenis            240.0
Volleyball         NaN
Name: population, dtype: float64

## Dataframes

Two-dimensional, size-mutable, potentially heterogeneous tabular data.

Data structure also contains labeled axes (rows and columns). Arithmetic operations align on both row and column labels. Can be thought of as a dict-like container for Series objects. The primary pandas data structure.

In [34]:
from pandas import DataFrame

In [35]:
data = {'state': ['Santa Catarina', 'Parana', 'Goias', 'Bahia', 'Minas Gerais'],
        'year': [2002, 2003, 2004, 2005, 2006],
        'population': [1.5, 1.7, 3.6, 2.4, 2.9]}

In [36]:
frame = DataFrame(data)

In [37]:
frame

Unnamed: 0,state,year,population
0,Santa Catarina,2002,1.5
1,Parana,2003,1.7
2,Goias,2004,3.6
3,Bahia,2005,2.4
4,Minas Gerais,2006,2.9


In [38]:
type(frame)

pandas.core.frame.DataFrame

In [39]:
frame2 = DataFrame(data, columns = ['year', 'state', 'population', 'area'], 
                   index = ['one', 'two', 'three', 'four', 'five'])

In [40]:
type(frame2)

pandas.core.frame.DataFrame

In [41]:
frame2

Unnamed: 0,year,state,population,area
one,2002,Santa Catarina,1.5,
two,2003,Parana,1.7,
three,2004,Goias,3.6,
four,2005,Bahia,2.4,
five,2006,Minas Gerais,2.9,


In [42]:
frame2['state']

one      Santa Catarina
two              Parana
three             Goias
four              Bahia
five       Minas Gerais
Name: state, dtype: object

In [43]:
frame2.index

Index(['one', 'two', 'three', 'four', 'five'], dtype='object')

In [44]:
frame2.columns

Index(['year', 'state', 'population', 'area'], dtype='object')

In [45]:
frame2.values

array([[2002, 'Santa Catarina', 1.5, nan],
       [2003, 'Parana', 1.7, nan],
       [2004, 'Goias', 3.6, nan],
       [2005, 'Bahia', 2.4, nan],
       [2006, 'Minas Gerais', 2.9, nan]], dtype=object)

In [46]:
frame2.year

one      2002
two      2003
three    2004
four     2005
five     2006
Name: year, dtype: int64

In [47]:
frame2['year']

one      2002
two      2003
three    2004
four     2005
five     2006
Name: year, dtype: int64

In [28]:
frame2[:2]

Unnamed: 0,year,state,population,area
one,2002,Santa Catarina,1.5,
two,2003,Parana,1.7,


In [31]:
frame[:2]

Unnamed: 0,state,year,population
0,Santa Catarina,2002,1.5
1,Parana,2003,1.7


## Using NumPy with Pandas

In [48]:
import numpy as np

In [50]:
frame2['area'] = np.arange(5.)

In [51]:
frame2

Unnamed: 0,year,state,population,area
one,2002,Santa Catarina,1.5,0.0
two,2003,Parana,1.7,1.0
three,2004,Goias,3.6,2.0
four,2005,Bahia,2.4,3.0
five,2006,Minas Gerais,2.9,4.0


In [52]:
frame2.values

array([[2002, 'Santa Catarina', 1.5, 0.0],
       [2003, 'Parana', 1.7, 1.0],
       [2004, 'Goias', 3.6, 2.0],
       [2005, 'Bahia', 2.4, 3.0],
       [2006, 'Minas Gerais', 2.9, 4.0]], dtype=object)

In [56]:
frame2.describe()

Unnamed: 0,year,population,area
count,5.0,5.0,5.0
mean,2004.0,2.42,2.0
std,1.581139,0.864292,1.581139
min,2002.0,1.5,0.0
25%,2003.0,1.7,1.0
50%,2004.0,2.4,2.0
75%,2005.0,2.9,3.0
max,2006.0,3.6,4.0


In [58]:
frame2['two':'four']

Unnamed: 0,year,state,population,area
two,2003,Parana,1.7,1.0
three,2004,Goias,3.6,2.0
four,2005,Bahia,2.4,3.0


In [61]:
frame2.area < 3

one       True
two       True
three     True
four     False
five     False
Name: area, dtype: bool

In [62]:
frame2.loc['five']

year                  2006
state         Minas Gerais
population             2.9
area                     4
Name: five, dtype: object

In [64]:
frame2.iloc[4]

year                  2006
state         Minas Gerais
population             2.9
area                     4
Name: five, dtype: object

In [1]:
web_status = {'days': [1, 2, 3, 4, 5, 6, 7],
             'visitors': [45, 23, 67, 78, 23, 12, 14],
             'rate': [11, 22, 33, 44, 55, 66, 77]}

In [3]:
import pandas as pd

In [4]:
df = pd.DataFrame(web_status)

In [5]:
df

Unnamed: 0,days,visitors,rate
0,1,45,11
1,2,23,22
2,3,67,33
3,4,78,44
4,5,23,55
5,6,12,66
6,7,14,77


In [6]:
df.set_index('days')

Unnamed: 0_level_0,visitors,rate
days,Unnamed: 1_level_1,Unnamed: 2_level_1
1,45,11
2,23,22
3,67,33
4,78,44
5,23,55
6,12,66
7,14,77


In [7]:
df.head()

Unnamed: 0,days,visitors,rate
0,1,45,11
1,2,23,22
2,3,67,33
3,4,78,44
4,5,23,55


In [8]:
df['visitors']

0    45
1    23
2    67
3    78
4    23
5    12
6    14
Name: visitors, dtype: int64