# Workshop - 10 Minutes to pandas

Work from the pandas documentation:
> [`https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html`](https://pandas.pydata.org/pandas-docs/stable/getting_started/10min.html)

You may want to read in an external dataframe after trying the examples at the end of the "10 minutes to pandas" documentation above. Try these:  
- https://raw.githubusercontent.com/datalab-datasets/file-samples/master/diamonds.csv
- https://raw.githubusercontent.com/datalab-datasets/wine-quality/master/winequality-red.csv
- https://raw.githubusercontent.com/datalab-datasets/wine-quality/master/winequality-white.csv

Find a few more details on these datasets and others here:
- https://github.com/datalab-datasets


The diamonds dataframe can be read by pandas with:

In [1]:
import numpy as np
import pandas as pd
diamonds_pdf = pd.read_csv('https://raw.githubusercontent.com/datalab-datasets/file-samples/master/diamonds.csv').drop(columns='Unnamed: 0')
redwine = pd.read_csv("https://raw.githubusercontent.com/datalab-datasets/wine-quality/master/winequality-red.csv", sep = ";")
whitewine = pd.read_csv("https://raw.githubusercontent.com/datalab-datasets/wine-quality/master/winequality-white.csv", sep = ";")

In [2]:
# Create pd.date 
dates = pd.date_range("20190101", periods = 6)
dates

DatetimeIndex(['2019-01-01', '2019-01-02', '2019-01-03', '2019-01-04',
               '2019-01-05', '2019-01-06'],
              dtype='datetime64[ns]', freq='D')

In [3]:
df = pd.DataFrame(np.random.randn(6,4), index = dates, columns = list("ABCD"))
df

Unnamed: 0,A,B,C,D
2019-01-01,0.400426,0.513692,0.637255,0.643269
2019-01-02,-0.138997,0.54704,-0.536065,0.167742
2019-01-03,0.641053,0.148645,-1.493667,0.098185
2019-01-04,-0.748613,0.586315,-2.141866,0.674681
2019-01-05,-0.803075,-0.176691,0.881845,-0.191245
2019-01-06,1.481632,-0.149803,1.235538,0.521548


In [4]:
diamonds_pdf.dtypes

carat      float64
cut         object
color       object
clarity     object
depth      float64
table      float64
price        int64
x          float64
y          float64
z          float64
dtype: object

In [5]:
diamonds_pdf.head()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55.0,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61.0,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65.0,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58.0,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58.0,335,4.34,4.35,2.75


In [6]:
diamonds_pdf.tail(3)

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
53937,0.7,Very Good,D,SI1,62.8,60.0,2757,5.66,5.68,3.56
53938,0.86,Premium,H,SI2,61.0,58.0,2757,6.15,6.12,3.74
53939,0.75,Ideal,D,SI2,62.2,55.0,2757,5.83,5.87,3.64


In [7]:
diamonds_pdf.index

RangeIndex(start=0, stop=53940, step=1)

In [8]:
diamonds_pdf.columns

Index(['carat', 'cut', 'color', 'clarity', 'depth', 'table', 'price', 'x', 'y',
       'z'],
      dtype='object')

In [9]:
df.to_numpy()

array([[ 0.40042645,  0.51369216,  0.63725487,  0.64326916],
       [-0.13899656,  0.54703973, -0.53606471,  0.16774207],
       [ 0.64105252,  0.14864462, -1.49366691,  0.09818523],
       [-0.74861294,  0.58631455, -2.1418659 ,  0.67468059],
       [-0.80307453, -0.17669084,  0.88184509, -0.19124518],
       [ 1.48163158, -0.14980294,  1.23553823,  0.52154823]])

In [10]:
diamonds_pdf.to_numpy()

array([[0.23, 'Ideal', 'E', ..., 3.95, 3.98, 2.43],
       [0.21, 'Premium', 'E', ..., 3.89, 3.84, 2.31],
       [0.23, 'Good', 'E', ..., 4.05, 4.07, 2.31],
       ...,
       [0.7, 'Very Good', 'D', ..., 5.66, 5.68, 3.56],
       [0.86, 'Premium', 'H', ..., 6.15, 6.12, 3.74],
       [0.75, 'Ideal', 'D', ..., 5.83, 5.87, 3.64]], dtype=object)

In [11]:
diamonds_pdf.describe()

Unnamed: 0,carat,depth,table,price,x,y,z
count,53940.0,53940.0,53940.0,53940.0,53940.0,53940.0,53940.0
mean,0.79794,61.749405,57.457184,3932.799722,5.731157,5.734526,3.538734
std,0.474011,1.432621,2.234491,3989.439738,1.121761,1.142135,0.705699
min,0.2,43.0,43.0,326.0,0.0,0.0,0.0
25%,0.4,61.0,56.0,950.0,4.71,4.72,2.91
50%,0.7,61.8,57.0,2401.0,5.7,5.71,3.53
75%,1.04,62.5,59.0,5324.25,6.54,6.54,4.04
max,5.01,79.0,95.0,18823.0,10.74,58.9,31.8


In [12]:
diamonds_pdf.T

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,53930,53931,53932,53933,53934,53935,53936,53937,53938,53939
carat,0.23,0.21,0.23,0.29,0.31,0.24,0.24,0.26,0.22,0.23,...,0.71,0.71,0.7,0.7,0.72,0.72,0.72,0.7,0.86,0.75
cut,Ideal,Premium,Good,Premium,Good,Very Good,Very Good,Very Good,Fair,Very Good,...,Premium,Premium,Very Good,Very Good,Premium,Ideal,Good,Very Good,Premium,Ideal
color,E,E,E,I,J,J,I,H,E,H,...,E,F,E,E,D,D,D,D,H,D
clarity,SI2,SI1,VS1,VS2,SI2,VVS2,VVS1,SI1,VS2,VS1,...,SI1,SI1,VS2,VS2,SI1,SI1,SI1,SI1,SI2,SI2
depth,61.5,59.8,56.9,62.4,63.3,62.8,62.3,61.9,65.1,59.4,...,60.5,59.8,60.5,61.2,62.7,60.8,63.1,62.8,61,62.2
table,55,61,65,58,58,57,57,55,61,61,...,55,62,59,59,59,57,55,60,58,55
price,326,326,327,334,335,336,336,337,337,338,...,2756,2756,2757,2757,2757,2757,2757,2757,2757,2757
x,3.95,3.89,4.05,4.2,4.34,3.94,3.95,4.07,3.87,4,...,5.79,5.74,5.71,5.69,5.69,5.75,5.69,5.66,6.15,5.83
y,3.98,3.84,4.07,4.23,4.35,3.96,3.98,4.11,3.78,4.05,...,5.74,5.73,5.76,5.72,5.73,5.76,5.75,5.68,6.12,5.87
z,2.43,2.31,2.31,2.63,2.75,2.48,2.47,2.53,2.49,2.39,...,3.49,3.43,3.47,3.49,3.58,3.5,3.61,3.56,3.74,3.64


In [13]:
# Sort by an axis, flip the column names 
diamonds_pdf.sort_index(axis = 1, ascending = False)

Unnamed: 0,z,y,x,table,price,depth,cut,color,clarity,carat
0,2.43,3.98,3.95,55.0,326,61.5,Ideal,E,SI2,0.23
1,2.31,3.84,3.89,61.0,326,59.8,Premium,E,SI1,0.21
2,2.31,4.07,4.05,65.0,327,56.9,Good,E,VS1,0.23
3,2.63,4.23,4.20,58.0,334,62.4,Premium,I,VS2,0.29
4,2.75,4.35,4.34,58.0,335,63.3,Good,J,SI2,0.31
5,2.48,3.96,3.94,57.0,336,62.8,Very Good,J,VVS2,0.24
6,2.47,3.98,3.95,57.0,336,62.3,Very Good,I,VVS1,0.24
7,2.53,4.11,4.07,55.0,337,61.9,Very Good,H,SI1,0.26
8,2.49,3.78,3.87,61.0,337,65.1,Fair,E,VS2,0.22
9,2.39,4.05,4.00,61.0,338,59.4,Very Good,H,VS1,0.23


In [14]:
diamonds_pdf.sort_values(by = "price", ascending = False)

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
27749,2.29,Premium,I,VS2,60.8,60.0,18823,8.50,8.47,5.16
27748,2.00,Very Good,G,SI1,63.5,56.0,18818,7.90,7.97,5.04
27747,1.51,Ideal,G,IF,61.7,55.0,18806,7.37,7.41,4.56
27746,2.07,Ideal,G,SI2,62.5,55.0,18804,8.20,8.13,5.11
27745,2.00,Very Good,H,SI1,62.8,57.0,18803,7.95,8.00,5.01
27744,2.29,Premium,I,SI1,61.8,59.0,18797,8.52,8.45,5.24
27742,2.04,Premium,H,SI1,58.1,60.0,18795,8.37,8.28,4.84
27743,2.00,Premium,I,VS1,60.8,59.0,18795,8.13,8.02,4.91
27740,1.71,Premium,F,VS2,62.3,59.0,18791,7.57,7.53,4.70
27741,2.15,Ideal,G,SI2,62.6,54.0,18791,8.29,8.35,5.21


In [15]:
redwine['alcohol']

0        9.4
1        9.8
2        9.8
3        9.8
4        9.4
5        9.4
6        9.4
7       10.0
8        9.5
9       10.5
10       9.2
11      10.5
12       9.9
13       9.1
14       9.2
15       9.2
16      10.5
17       9.3
18       9.0
19       9.2
20       9.4
21       9.7
22       9.5
23       9.4
24       9.7
25       9.3
26       9.5
27       9.5
28       9.4
29       9.8
        ... 
1569    11.5
1570    12.4
1571    11.1
1572     9.5
1573    12.5
1574    10.5
1575    11.8
1576    10.8
1577    11.9
1578    11.3
1579    11.3
1580    11.9
1581    11.3
1582    11.9
1583     9.8
1584    11.6
1585    11.5
1586    11.4
1587    10.9
1588    12.8
1589     9.2
1590    11.6
1591    11.6
1592    11.0
1593     9.5
1594    10.5
1595    11.2
1596    11.0
1597    10.2
1598    11.0
Name: alcohol, Length: 1599, dtype: float64

In [16]:
redwine[3:5]

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
3,11.2,0.28,0.56,1.9,0.075,17.0,60.0,0.998,3.16,0.58,9.8,6
4,7.4,0.7,0.0,1.9,0.076,11.0,34.0,0.9978,3.51,0.56,9.4,5


In [17]:
df['20190102':'20190105']

Unnamed: 0,A,B,C,D
2019-01-02,-0.138997,0.54704,-0.536065,0.167742
2019-01-03,0.641053,0.148645,-1.493667,0.098185
2019-01-04,-0.748613,0.586315,-2.141866,0.674681
2019-01-05,-0.803075,-0.176691,0.881845,-0.191245


In [18]:
####################
df.loc[dates[0]]

A    0.400426
B    0.513692
C    0.637255
D    0.643269
Name: 2019-01-01 00:00:00, dtype: float64

In [19]:
redwine.loc[:, ['alcohol', 'quality']]

Unnamed: 0,alcohol,quality
0,9.4,5
1,9.8,5
2,9.8,5
3,9.8,6
4,9.4,5
5,9.4,5
6,9.4,5
7,10.0,7
8,9.5,7
9,10.5,5


In [20]:
df.loc['20190102':'20190104', ['A', 'B']]

Unnamed: 0,A,B
2019-01-02,-0.138997,0.54704
2019-01-03,0.641053,0.148645
2019-01-04,-0.748613,0.586315


In [21]:
df.loc['20190101', ['A', 'B']]

A    0.400426
B    0.513692
Name: 2019-01-01 00:00:00, dtype: float64

In [22]:
df.loc[dates[0], 'A']

0.40042644665222576

In [23]:
df.at[dates[0], 'A']

0.40042644665222576

In [24]:
redwine.iloc[3]

fixed acidity           11.200
volatile acidity         0.280
citric acid              0.560
residual sugar           1.900
chlorides                0.075
free sulfur dioxide     17.000
total sulfur dioxide    60.000
density                  0.998
pH                       3.160
sulphates                0.580
alcohol                  9.800
quality                  6.000
Name: 3, dtype: float64

In [25]:
redwine.iloc[3:5, 0:2]

Unnamed: 0,fixed acidity,volatile acidity
3,11.2,0.28
4,7.4,0.7


In [26]:
redwine.iloc[[1,2,4], [0,2]]

Unnamed: 0,fixed acidity,citric acid
1,7.8,0.0
2,7.8,0.04
4,7.4,0.0


In [27]:
redwine.iloc[1:3, :]

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
1,7.8,0.88,0.0,2.6,0.098,25.0,67.0,0.9968,3.2,0.68,9.8,5
2,7.8,0.76,0.04,2.3,0.092,15.0,54.0,0.997,3.26,0.65,9.8,5


In [28]:
redwine.iloc[:, 1:3]

Unnamed: 0,volatile acidity,citric acid
0,0.700,0.00
1,0.880,0.00
2,0.760,0.04
3,0.280,0.56
4,0.700,0.00
5,0.660,0.00
6,0.600,0.06
7,0.650,0.00
8,0.580,0.02
9,0.500,0.36


In [29]:
redwine.iloc[1,1]

0.88

In [30]:
redwine.iat[1,1]

0.88

In [31]:
redwine[redwine.alcohol > 12]

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
45,4.6,0.520,0.15,2.10,0.054,8.0,65.0,0.99340,3.90,0.56,13.100000,4
95,4.7,0.600,0.17,2.30,0.058,17.0,106.0,0.99320,3.85,0.60,12.900000,6
131,5.6,0.500,0.09,2.30,0.049,17.0,99.0,0.99370,3.63,0.63,13.000000,5
132,5.6,0.500,0.09,2.30,0.049,17.0,99.0,0.99370,3.63,0.63,13.000000,5
142,5.2,0.340,0.00,1.80,0.050,27.0,63.0,0.99160,3.68,0.79,14.000000,6
144,5.2,0.340,0.00,1.80,0.050,27.0,63.0,0.99160,3.68,0.79,14.000000,6
198,5.4,0.835,0.08,1.20,0.046,13.0,93.0,0.99240,3.57,0.85,13.000000,7
210,9.7,0.530,0.60,2.00,0.039,5.0,19.0,0.99585,3.30,0.86,12.400000,6
230,5.2,0.480,0.04,1.60,0.054,19.0,106.0,0.99270,3.54,0.62,12.200000,7
267,7.9,0.350,0.46,3.60,0.078,15.0,37.0,0.99730,3.35,0.86,12.800000,8


In [32]:
redwine[redwine < 0.2]

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,,,0.00,,0.076,,,,,,,
1,,,0.00,,0.098,,,,,,,
2,,,0.04,,0.092,,,,,,,
3,,,,,0.075,,,,,,,
4,,,0.00,,0.076,,,,,,,
5,,,0.00,,0.075,,,,,,,
6,,,0.06,,0.069,,,,,,,
7,,,0.00,,0.065,,,,,,,
8,,,0.02,,0.073,,,,,,,
9,,,,,0.071,,,,,,,


In [33]:
redwine[redwine['quality'].isin([3, 10])]

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
459,11.6,0.58,0.66,2.2,0.074,10.0,47.0,1.0008,3.25,0.57,9.0,3
517,10.4,0.61,0.49,2.1,0.2,5.0,16.0,0.9994,3.16,0.63,8.4,3
690,7.4,1.185,0.0,4.25,0.097,5.0,14.0,0.9966,3.63,0.54,10.7,3
832,10.4,0.44,0.42,1.5,0.145,34.0,48.0,0.99832,3.38,0.86,9.9,3
899,8.3,1.02,0.02,3.4,0.084,6.0,11.0,0.99892,3.48,0.49,11.0,3
1299,7.6,1.58,0.0,2.1,0.137,5.0,9.0,0.99476,3.5,0.4,10.9,3
1374,6.8,0.815,0.0,1.2,0.267,16.0,29.0,0.99471,3.32,0.51,9.8,3
1469,7.3,0.98,0.05,2.1,0.061,20.0,49.0,0.99705,3.31,0.55,9.7,3
1478,7.1,0.875,0.05,5.7,0.082,3.0,14.0,0.99808,3.4,0.52,10.2,3
1505,6.7,0.76,0.02,1.8,0.078,6.0,12.0,0.996,3.55,0.63,9.95,3


In [34]:
df.at[dates[0], 'A'] = 0

In [35]:
df.iat[0, 1] = 0 

In [36]:
df.loc[:, 'D'] = np.array([5] * len(df))

In [37]:
df

Unnamed: 0,A,B,C,D
2019-01-01,0.0,0.0,0.637255,5
2019-01-02,-0.138997,0.54704,-0.536065,5
2019-01-03,0.641053,0.148645,-1.493667,5
2019-01-04,-0.748613,0.586315,-2.141866,5
2019-01-05,-0.803075,-0.176691,0.881845,5
2019-01-06,1.481632,-0.149803,1.235538,5


In [38]:
df2 = df.copy()
df2[df2 > 0 ] = -df2

In [39]:
df2

Unnamed: 0,A,B,C,D
2019-01-01,0.0,0.0,-0.637255,-5
2019-01-02,-0.138997,-0.54704,-0.536065,-5
2019-01-03,-0.641053,-0.148645,-1.493667,-5
2019-01-04,-0.748613,-0.586315,-2.141866,-5
2019-01-05,-0.803075,-0.176691,-0.881845,-5
2019-01-06,-1.481632,-0.149803,-1.235538,-5


In [40]:
# reindex defines the new index series and keeps only the values existing with that index 
df1 = df.reindex(index = dates[0:4], columns = list(df.columns) + ['E'])
df1.loc[dates[0]:dates[1], 'E'] = 1
df1

Unnamed: 0,A,B,C,D,E
2019-01-01,0.0,0.0,0.637255,5,1.0
2019-01-02,-0.138997,0.54704,-0.536065,5,1.0
2019-01-03,0.641053,0.148645,-1.493667,5,
2019-01-04,-0.748613,0.586315,-2.141866,5,


In [41]:
df1.dropna(how = "any")

Unnamed: 0,A,B,C,D,E
2019-01-01,0.0,0.0,0.637255,5,1.0
2019-01-02,-0.138997,0.54704,-0.536065,5,1.0


In [42]:
df1.fillna(value = 5)

Unnamed: 0,A,B,C,D,E
2019-01-01,0.0,0.0,0.637255,5,1.0
2019-01-02,-0.138997,0.54704,-0.536065,5,1.0
2019-01-03,0.641053,0.148645,-1.493667,5,5.0
2019-01-04,-0.748613,0.586315,-2.141866,5,5.0


In [43]:
pd.isna(df1)

Unnamed: 0,A,B,C,D,E
2019-01-01,False,False,False,False,False
2019-01-02,False,False,False,False,False
2019-01-03,False,False,False,False,True
2019-01-04,False,False,False,False,True


In [45]:
whitewine.mean()

fixed acidity             6.854788
volatile acidity          0.278241
citric acid               0.334192
residual sugar            6.391415
chlorides                 0.045772
free sulfur dioxide      35.308085
total sulfur dioxide    138.360657
density                   0.994027
pH                        3.188267
sulphates                 0.489847
alcohol                  10.514267
quality                   5.877909
dtype: float64

In [46]:
whitewine.mean(1)

0       21.885500
1       14.572750
2       13.627092
3       22.482800
4       22.482800
5       13.627092
6       16.664158
7       21.885500
8       14.572750
9       15.746483
10       8.816983
11      13.235808
12      10.107667
13      18.679600
14      21.748350
15      14.322783
16      13.096567
17      11.506517
18      18.269558
19      16.725792
20      11.506517
21      12.794933
22      16.306000
23      18.486475
24      15.626425
25      28.230125
26      17.535417
27      17.568925
28      15.047367
29      15.417800
          ...    
4868    16.552437
4869    10.566522
4870    16.439407
4871    12.372892
4872    22.308185
4873    15.466703
4874    14.223402
4875    12.028121
4876    15.339943
4877     9.972905
4878     9.397278
4879    22.805356
4880    22.805356
4881    15.770867
4882    19.051570
4883    19.798720
4884    23.066077
4885    22.806745
4886    16.904140
4887     7.881023
4888    16.033775
4889    15.762533
4890    13.021282
4891    15.621562
4892    14

In [47]:
whitewine.apply(np.cumsum)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.00,0.270,0.36,20.70,0.045,45.0,170.0,1.00100,3.00,0.45,8.800000,6
1,13.30,0.570,0.70,22.30,0.094,59.0,302.0,1.99500,6.30,0.94,18.300000,12
2,21.40,0.850,1.10,29.20,0.144,89.0,399.0,2.99010,9.56,1.38,28.400000,18
3,28.60,1.080,1.42,37.70,0.202,136.0,585.0,3.98570,12.75,1.78,38.300000,24
4,35.80,1.310,1.74,46.20,0.260,183.0,771.0,4.98130,15.94,2.18,48.200000,30
5,43.90,1.590,2.14,53.10,0.310,213.0,868.0,5.97640,19.20,2.62,58.300000,36
6,50.10,1.910,2.30,60.10,0.355,243.0,1004.0,6.97130,22.38,3.09,67.900000,42
7,57.10,2.180,2.66,80.80,0.400,288.0,1174.0,7.97230,25.38,3.54,76.700000,48
8,63.40,2.480,3.00,82.40,0.449,302.0,1306.0,8.96630,28.68,4.03,86.200000,54
9,71.50,2.700,3.43,83.90,0.493,330.0,1435.0,9.96010,31.90,4.48,97.200000,60


In [48]:
redwine.apply(lambda x: x.max() - x.min())

fixed acidity            11.30000
volatile acidity          1.46000
citric acid               1.00000
residual sugar           14.60000
chlorides                 0.59900
free sulfur dioxide      71.00000
total sulfur dioxide    283.00000
density                   0.01362
pH                        1.27000
sulphates                 1.67000
alcohol                   6.50000
quality                   5.00000
dtype: float64

```
In [63]: s = pd.Series([1, 3, 5, np.nan, 6, 8], index=dates).shift(2)

In [64]: s
Out[64]: 
2013-01-01    NaN
2013-01-02    NaN
2013-01-03    1.0
2013-01-04    3.0
2013-01-05    5.0
2013-01-06    NaN
Freq: D, dtype: float64

In [65]: df.sub(s, axis='index')
Out[65]: 
                   A         B         C    D    F
2013-01-01       NaN       NaN       NaN  NaN  NaN
2013-01-02       NaN       NaN       NaN  NaN  NaN
2013-01-03 -1.861849 -3.104569 -1.494929  4.0  1.0
2013-01-04 -2.278445 -3.706771 -4.039575  2.0  0.0
2013-01-05 -5.424972 -4.432980 -4.723768  0.0 -1.0
2013-01-06       NaN       NaN       NaN  NaN  NaN
```

```
In [68]: s = pd.Series(np.random.randint(0, 7, size=10))

In [69]: s
Out[69]: 
0    4
1    2
2    1
3    2
4    6
5    4
6    4
7    6
8    4
9    4
dtype: int64

In [70]: s.value_counts()
Out[70]: 
4    5
6    2
2    2
1    1
dtype: int64
```

In [58]:
diamonds_pdf.cut.lower()

AttributeError: 'Series' object has no attribute 'lower'

In [60]:
pieces = [redwine, whitewine]
pd.concat(pieces)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.4,0.700,0.00,1.90,0.076,11.0,34.0,0.99780,3.51,0.56,9.400000,5
1,7.8,0.880,0.00,2.60,0.098,25.0,67.0,0.99680,3.20,0.68,9.800000,5
2,7.8,0.760,0.04,2.30,0.092,15.0,54.0,0.99700,3.26,0.65,9.800000,5
3,11.2,0.280,0.56,1.90,0.075,17.0,60.0,0.99800,3.16,0.58,9.800000,6
4,7.4,0.700,0.00,1.90,0.076,11.0,34.0,0.99780,3.51,0.56,9.400000,5
5,7.4,0.660,0.00,1.80,0.075,13.0,40.0,0.99780,3.51,0.56,9.400000,5
6,7.9,0.600,0.06,1.60,0.069,15.0,59.0,0.99640,3.30,0.46,9.400000,5
7,7.3,0.650,0.00,1.20,0.065,15.0,21.0,0.99460,3.39,0.47,10.000000,7
8,7.8,0.580,0.02,2.00,0.073,9.0,18.0,0.99680,3.36,0.57,9.500000,7
9,7.5,0.500,0.36,6.10,0.071,17.0,102.0,0.99780,3.35,0.80,10.500000,5


In [61]:
whitewine.append(redwine, ignore_index = True)

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,7.0,0.270,0.36,20.70,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
1,6.3,0.300,0.34,1.60,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
2,8.1,0.280,0.40,6.90,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
3,7.2,0.230,0.32,8.50,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
4,7.2,0.230,0.32,8.50,0.058,47.0,186.0,0.99560,3.19,0.40,9.9,6
5,8.1,0.280,0.40,6.90,0.050,30.0,97.0,0.99510,3.26,0.44,10.1,6
6,6.2,0.320,0.16,7.00,0.045,30.0,136.0,0.99490,3.18,0.47,9.6,6
7,7.0,0.270,0.36,20.70,0.045,45.0,170.0,1.00100,3.00,0.45,8.8,6
8,6.3,0.300,0.34,1.60,0.049,14.0,132.0,0.99400,3.30,0.49,9.5,6
9,8.1,0.220,0.43,1.50,0.044,28.0,129.0,0.99380,3.22,0.45,11.0,6


In [62]:
whitewine.groupby('alcohol').sum()

Unnamed: 0_level_0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,quality
alcohol,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
8.000000,8.7,0.405,0.44,6.05,0.074,153.0,316.0,1.990200,6.76,0.86,8
8.400000,18.8,0.990,0.58,12.50,0.160,74.0,317.0,2.983940,9.38,1.42,14
8.500000,62.7,2.770,3.59,99.95,0.468,399.0,1587.0,8.985220,28.61,4.57,50
8.600000,165.6,6.760,10.13,284.90,1.166,1053.0,3980.0,22.963340,71.64,11.38,119
8.700000,552.2,22.635,35.71,1010.10,4.050,3352.0,12761.0,77.858940,240.14,35.55,432
8.800000,739.0,28.385,37.69,1484.20,5.470,4556.0,17354.5,106.848320,334.51,50.57,610
8.900000,662.9,25.625,34.70,1120.25,4.818,4792.0,16701.0,94.792510,297.17,46.39,543
9.000000,1267.8,52.575,61.18,2131.70,10.308,7386.0,30584.0,184.508590,575.54,86.10,1035
9.100000,1002.8,41.590,50.61,1546.55,8.354,5985.5,24466.5,143.607420,453.54,70.84,806
9.200000,1377.0,59.045,70.31,1846.15,11.913,7324.5,30651.0,198.304350,623.31,94.13,1071


In [64]:
whitewine.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

In [67]:
whitewine.groupby(["alcohol", "quality"]).sum()

Unnamed: 0_level_0,Unnamed: 1_level_0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates
alcohol,quality,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
8.000000,3,4.2,0.215,0.23,5.10,0.041,64.0,157.0,0.996880,3.42,0.44
8.000000,5,4.5,0.190,0.21,0.95,0.033,89.0,159.0,0.993320,3.34,0.42
8.400000,4,6.4,0.370,0.12,5.90,0.056,6.0,91.0,0.995360,3.06,0.46
8.400000,5,12.4,0.620,0.46,6.60,0.104,68.0,226.0,1.988580,6.32,0.96
8.500000,3,9.1,0.590,0.38,1.60,0.066,34.0,182.0,0.996800,3.23,0.38
8.500000,5,20.2,1.070,1.00,30.55,0.173,124.0,501.0,2.993040,9.57,1.33
8.500000,6,25.6,0.930,1.75,55.20,0.187,200.0,761.0,3.995380,12.57,2.10
8.500000,8,7.8,0.180,0.46,12.60,0.042,41.0,143.0,1.000000,3.24,0.76
8.600000,4,28.2,1.430,1.97,39.10,0.182,209.0,685.0,3.989340,12.47,1.82
8.600000,5,99.8,4.180,6.38,183.80,0.684,629.0,2520.0,13.980100,43.48,6.65


In [69]:
stacked = diamonds_pdf.stack()
stacked

0      carat           0.23
       cut            Ideal
       color              E
       clarity          SI2
       depth           61.5
       table             55
       price            326
       x               3.95
       y               3.98
       z               2.43
1      carat           0.21
       cut          Premium
       color              E
       clarity          SI1
       depth           59.8
       table             61
       price            326
       x               3.89
       y               3.84
       z               2.31
2      carat           0.23
       cut             Good
       color              E
       clarity          VS1
       depth           56.9
       table             65
       price            327
       x               4.05
       y               4.07
       z               2.31
                    ...    
53937  carat            0.7
       cut        Very Good
       color              D
       clarity          SI1
       depth        

In [70]:
stacked.unstack()

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58,335,4.34,4.35,2.75
5,0.24,Very Good,J,VVS2,62.8,57,336,3.94,3.96,2.48
6,0.24,Very Good,I,VVS1,62.3,57,336,3.95,3.98,2.47
7,0.26,Very Good,H,SI1,61.9,55,337,4.07,4.11,2.53
8,0.22,Fair,E,VS2,65.1,61,337,3.87,3.78,2.49
9,0.23,Very Good,H,VS1,59.4,61,338,4,4.05,2.39


In [71]:
stacked.unstack(1)

Unnamed: 0,carat,cut,color,clarity,depth,table,price,x,y,z
0,0.23,Ideal,E,SI2,61.5,55,326,3.95,3.98,2.43
1,0.21,Premium,E,SI1,59.8,61,326,3.89,3.84,2.31
2,0.23,Good,E,VS1,56.9,65,327,4.05,4.07,2.31
3,0.29,Premium,I,VS2,62.4,58,334,4.2,4.23,2.63
4,0.31,Good,J,SI2,63.3,58,335,4.34,4.35,2.75
5,0.24,Very Good,J,VVS2,62.8,57,336,3.94,3.96,2.48
6,0.24,Very Good,I,VVS1,62.3,57,336,3.95,3.98,2.47
7,0.26,Very Good,H,SI1,61.9,55,337,4.07,4.11,2.53
8,0.22,Fair,E,VS2,65.1,61,337,3.87,3.78,2.49
9,0.23,Very Good,H,VS1,59.4,61,338,4,4.05,2.39


In [72]:
stacked.unstack(0)

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,53930,53931,53932,53933,53934,53935,53936,53937,53938,53939
carat,0.23,0.21,0.23,0.29,0.31,0.24,0.24,0.26,0.22,0.23,...,0.71,0.71,0.7,0.7,0.72,0.72,0.72,0.7,0.86,0.75
cut,Ideal,Premium,Good,Premium,Good,Very Good,Very Good,Very Good,Fair,Very Good,...,Premium,Premium,Very Good,Very Good,Premium,Ideal,Good,Very Good,Premium,Ideal
color,E,E,E,I,J,J,I,H,E,H,...,E,F,E,E,D,D,D,D,H,D
clarity,SI2,SI1,VS1,VS2,SI2,VVS2,VVS1,SI1,VS2,VS1,...,SI1,SI1,VS2,VS2,SI1,SI1,SI1,SI1,SI2,SI2
depth,61.5,59.8,56.9,62.4,63.3,62.8,62.3,61.9,65.1,59.4,...,60.5,59.8,60.5,61.2,62.7,60.8,63.1,62.8,61,62.2
table,55,61,65,58,58,57,57,55,61,61,...,55,62,59,59,59,57,55,60,58,55
price,326,326,327,334,335,336,336,337,337,338,...,2756,2756,2757,2757,2757,2757,2757,2757,2757,2757
x,3.95,3.89,4.05,4.2,4.34,3.94,3.95,4.07,3.87,4,...,5.79,5.74,5.71,5.69,5.69,5.75,5.69,5.66,6.15,5.83
y,3.98,3.84,4.07,4.23,4.35,3.96,3.98,4.11,3.78,4.05,...,5.74,5.73,5.76,5.72,5.73,5.76,5.75,5.68,6.12,5.87
z,2.43,2.31,2.31,2.63,2.75,2.48,2.47,2.53,2.49,2.39,...,3.49,3.43,3.47,3.49,3.58,3.5,3.61,3.56,3.74,3.64


In [74]:
pd.pivot_table(redwine, index = ["fixed acidity", "volatile acidity", "citric acid", "residual sugar"], 
              columns = ["pH"])

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,Unnamed: 3_level_0,alcohol,alcohol,alcohol,alcohol,alcohol,alcohol,alcohol,alcohol,alcohol,alcohol,...,total sulfur dioxide,total sulfur dioxide,total sulfur dioxide,total sulfur dioxide,total sulfur dioxide,total sulfur dioxide,total sulfur dioxide,total sulfur dioxide,total sulfur dioxide,total sulfur dioxide
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,pH,2.74,2.86,2.87,2.88,2.89,2.90,2.92,2.93,2.94,2.95,...,3.69,3.70,3.71,3.72,3.74,3.75,3.78,3.85,3.90,4.01
fixed acidity,volatile acidity,citric acid,residual sugar,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2,Unnamed: 8_level_2,Unnamed: 9_level_2,Unnamed: 10_level_2,Unnamed: 11_level_2,Unnamed: 12_level_2,Unnamed: 13_level_2,Unnamed: 14_level_2,Unnamed: 15_level_2,Unnamed: 16_level_2,Unnamed: 17_level_2,Unnamed: 18_level_2,Unnamed: 19_level_2,Unnamed: 20_level_2,Unnamed: 21_level_2,Unnamed: 22_level_2,Unnamed: 23_level_2,Unnamed: 24_level_2
4.6,0.520,0.15,2.10,,,,,,,,,,,...,,,,,,,,,65.0,
4.7,0.600,0.17,2.30,,,,,,,,,,,...,,,,,,,,106.0,,
4.9,0.420,0.00,2.10,,,,,,,,,,,...,,,42.0,,,,,,,
5.0,0.380,0.01,1.60,,,,,,,,,,,...,,60.0,,,,,,,,
5.0,0.400,0.50,4.30,,,,,,,,,,,...,,,,,,,,,,
5.0,0.420,0.24,2.00,,,,,,,,,,,...,,,,50.0,,,,,,
5.0,0.740,0.00,1.20,,,,,,,,,,,...,,,,,,,,,,46.0
5.0,1.020,0.04,1.40,,,,,,,,,,,...,,,,,,85.0,,,,
5.0,1.040,0.24,1.60,,,,,,,,,,,...,,,,,96.0,,,,,
5.1,0.420,0.00,1.80,,,,,,,,,,,...,,,,,,,,,,


In [76]:
left = pd.DataFrame({'key': ['foo', 'foo'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'foo'], 'rval': [4, 5]})

pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,foo,1,5
2,foo,2,4
3,foo,2,5


In [77]:
left = pd.DataFrame({'key': ['foo', 'bar'], 'lval': [1, 2]})
right = pd.DataFrame({'key': ['foo', 'bar'], 'rval': [4, 5]})

pd.merge(left, right, on='key')

Unnamed: 0,key,lval,rval
0,foo,1,4
1,bar,2,5


The end. 