In [4]:
import pandas as pd
import numpy as np

In [5]:
df = pd.DataFrame([[1.4, np.nan],
                   [7.1, -4.5],
                   [np.nan, np.nan],
                   [0.75, -1.3]],
                  index=["a", "b", "c", "d"],
                  columns=["one", "two"])

In [6]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [8]:
df.sum()

one    9.25
two   -5.80
dtype: float64

In [9]:
df.sum(axis="columns")

a    1.40
b    2.60
c    0.00
d   -0.55
dtype: float64

In [10]:
df.sum(axis="index", skipna=False)

one   NaN
two   NaN
dtype: float64

In [11]:
df.mean(axis="columns")

a    1.400
b    1.300
c      NaN
d   -0.275
dtype: float64

In [12]:
df.idxmax()

one    b
two    d
dtype: object

In [13]:
df.cumsum()

Unnamed: 0,one,two
a,1.4,
b,8.5,-4.5
c,,
d,9.25,-5.8


In [14]:
df.describe()

Unnamed: 0,one,two
count,3.0,2.0
mean,3.083333,-2.9
std,3.493685,2.262742
min,0.75,-4.5
25%,1.075,-3.7
50%,1.4,-2.9
75%,4.25,-2.1
max,7.1,-1.3


In [15]:
obj = pd.Series(["a", "a", "b", "c"] * 4)

In [16]:
obj.describe()

count     16
unique     3
top        a
freq       8
dtype: object

In [20]:
price = pd.read_pickle("yahoo_price.pkl")
volume = pd.read_pickle("yahoo_volume.pkl")

In [23]:
result = price.pct_change()
result.tail()

Unnamed: 0_level_0,AAPL,GOOG,IBM,MSFT
Date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
2016-10-17,-0.00068,0.001837,0.002072,-0.003483
2016-10-18,-0.000681,0.019616,-0.026168,0.00769
2016-10-19,-0.002979,0.007846,0.003583,-0.002255
2016-10-20,-0.000512,-0.005652,0.001719,-0.004867
2016-10-21,-0.00393,0.003011,-0.012474,0.042096


In [25]:
result["MSFT"].corr(result["IBM"])

0.4997636114415114

In [26]:
result["MSFT"].cov(result["IBM"])

8.870655479703546e-05

In [27]:
result.corr()

Unnamed: 0,AAPL,GOOG,IBM,MSFT
AAPL,1.0,0.407919,0.386817,0.389695
GOOG,0.407919,1.0,0.405099,0.465919
IBM,0.386817,0.405099,1.0,0.499764
MSFT,0.389695,0.465919,0.499764,1.0


In [28]:
result.corrwith(result["IBM"])

AAPL    0.386817
GOOG    0.405099
IBM     1.000000
MSFT    0.499764
dtype: float64

In [29]:
result.corrwith(volume)

AAPL   -0.075565
GOOG   -0.007067
IBM    -0.204849
MSFT   -0.092950
dtype: float64

In [52]:
float_data = pd.Series([1.2, -3.5, np.nan, 0])
float_data

0    1.2
1   -3.5
2    NaN
3    0.0
dtype: float64

In [53]:
float_data.isna()

0    False
1    False
2     True
3    False
dtype: bool

In [55]:
string_data = pd.Series(["aabbccdd", np.nan, None, "apple"])
string_data.isna()

0    False
1     True
2     True
3    False
dtype: bool

In [56]:
data = pd.Series([1, np.nan, 3.5, np.nan, 7])
data.dropna()

0    1.0
2    3.5
4    7.0
dtype: float64

In [62]:
data = pd.DataFrame([[1., 6.5, 3.], [1., np.nan, np.nan],
                     [np.nan, np.nan, np.nan], [np.nan, 6.5, 3.]])
data

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
2,,,
3,,6.5,3.0


In [63]:
data.dropna()

Unnamed: 0,0,1,2
0,1.0,6.5,3.0


Unnamed: 0,0,1,2
0,1.0,6.5,3.0


In [64]:
data.dropna(how="all")

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
3,,6.5,3.0


In [65]:
data[4] = np.nan
data

Unnamed: 0,0,1,2,4
0,1.0,6.5,3.0,
1,1.0,,,
2,,,,
3,,6.5,3.0,


In [66]:
data.dropna(axis="columns", how="all")

Unnamed: 0,0,1,2
0,1.0,6.5,3.0
1,1.0,,
2,,,
3,,6.5,3.0


In [68]:
df

Unnamed: 0,one,two
a,1.4,
b,7.1,-4.5
c,,
d,0.75,-1.3


In [69]:
df.fillna(0)

Unnamed: 0,one,two
a,1.4,0.0
b,7.1,-4.5
c,0.0,0.0
d,0.75,-1.3


In [72]:
df.fillna({'one': 0.5, 'two': 0})

Unnamed: 0,one,two
a,1.4,0.0
b,7.1,-4.5
c,0.5,0.0
d,0.75,-1.3


In [73]:
data = pd.Series([1., np.nan, 3.5, np.nan, 7])
data.fillna(data.mean())

0    1.000000
1    3.833333
2    3.500000
3    3.833333
4    7.000000
dtype: float64

In [74]:
data = {'Country': ['Russia', 'USA', 'China', 'India', 'Brazil'],
        'Life Expectancy': [71.0, 79.0, 76.0, 68.5, 75.2],
        'Population': [144.5, 327.2, 1402.0, 1371.3, 211.0]}
df = pd.DataFrame(data)
df

Unnamed: 0,Country,Life Expectancy,Population
0,Russia,71.0,144.5
1,USA,79.0,327.2
2,China,76.0,1402.0
3,India,68.5,1371.3
4,Brazil,75.2,211.0


In [75]:
pivot_table = pd.pivot_table(df, values=['Life Expectancy', 'Population'], index='Country')
pivot_table

Unnamed: 0_level_0,Life Expectancy,Population
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Brazil,75.2,211.0
China,76.0,1402.0
India,68.5,1371.3
Russia,71.0,144.5
USA,79.0,327.2


In [77]:
data = {'Country': ['Russia', 'USA', 'China', 'India', 'Brazil'],
        'Year': [2000, 2000, 2000, 2010, 2010],
        'Life Expectancy': [71.0, 79.0, 76.0, 68.5, 75.2],
        'Population': [144.5, 327.2, 1402.0, 1371.3, 211.0]}
df = pd.DataFrame(data)
pivot_table = pd.pivot_table(df, values='Life Expectancy', index=['Country', 'Year'], columns='Year', aggfunc='mean')
pivot_table

Unnamed: 0_level_0,Year,2000,2010,2000,2010
Country,Year,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1
Brazil,2010,,75.2,,211.0
China,2000,76.0,,1402.0,
India,2010,,68.5,,1371.3
Russia,2000,71.0,,144.5,
USA,2000,79.0,,327.2,


In [79]:
filtered_data = df[(df['Country'] == 'Russia') & (df['Year'] == 2000)]
pivot_table = pd.pivot_table(filtered_data, values='Life Expectancy', index='Country', columns='Year', aggfunc='mean')
pivot_table

Year,2000
Country,Unnamed: 1_level_1
Russia,71.0


In [80]:
df_pivot = df.pivot(index='Country',
                   values='Population',
                   columns='Year')
df_pivot

Year,2000,2010
Country,Unnamed: 1_level_1,Unnamed: 2_level_1
Brazil,,211.0
China,1402.0,
India,,1371.3
Russia,144.5,
USA,327.2,


In [85]:
data = {'Месяц': ['Январь', 'Февраль', 'Март', 'Апрель'],
       'Продажи': [100, 200, 150, 300]}
df = pd.DataFrame(data)
data2 = {'Месяц': ['Апрель', 'Май', 'Июнь', 'Июль'],
       'Продажи': [250, 150, 200, 350]}
df2 = pd.DataFrame(data2)

In [86]:
df

Unnamed: 0,Месяц,Продажи
0,Январь,100
1,Февраль,200
2,Март,150
3,Апрель,300


In [87]:
df2

Unnamed: 0,Месяц,Продажи
0,Апрель,250
1,Май,150
2,Июнь,200
3,Июль,350


In [92]:
df3 = df.join(df2.set_index('Месяц'), on='Месяц', rsuffix='_2')
df3

Unnamed: 0,Месяц,Продажи,Продажи_2
0,Январь,100,
1,Февраль,200,
2,Март,150,
3,Апрель,300,250.0


In [95]:
df4 = pd.merge(df, df2, on='Месяц')
df4

Unnamed: 0,Месяц,Продажи_x,Продажи_y
0,Апрель,300,250


In [96]:
df1 = pd.DataFrame({'A': ['A0', 'A1', 'A2', 'A3'],
                    'B': ['B0', 'B1', 'B2', 'B3']})
df2 = pd.DataFrame({'A': ['A4', 'A5', 'A6', 'A7'],
                    'B': ['B4', 'B5', 'B6', 'B7']})

In [97]:
pd.concat([df1, df2], axis=0)

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3
0,A4,B4
1,A5,B5
2,A6,B6
3,A7,B7


In [98]:
pd.concat([df1, df2], axis=1)

Unnamed: 0,A,B,A.1,B.1
0,A0,B0,A4,B4
1,A1,B1,A5,B5
2,A2,B2,A6,B6
3,A3,B3,A7,B7


In [99]:
df3 = df1._append(df2)
df3

Unnamed: 0,A,B
0,A0,B0
1,A1,B1
2,A2,B2
3,A3,B3
0,A4,B4
1,A5,B5
2,A6,B6
3,A7,B7


In [100]:
df = pd.DataFrame(columns=['A', 'B'])
df

Unnamed: 0,A,B


In [101]:
df = df._append({'A': 1, 'B': 2}, ignore_index=True)
df

Unnamed: 0,A,B
0,1,2
