In [1]:
import pandas as pd
import numpy as np

In [2]:
median_income_dict = {
    "Alexandria": 80847,
    "Amherst": 44757,
    "Arlington": 94880,
    "Augusta": 50612,
    "Bedford": 54110,
    "Botetourt": 64724,
    "Fairfax": 105416,
    "Falls Church": 114409,
    "Prince William": 91098,
}

median_income_dict

{'Alexandria': 80847,
 'Amherst': 44757,
 'Arlington': 94880,
 'Augusta': 50612,
 'Bedford': 54110,
 'Botetourt': 64724,
 'Fairfax': 105416,
 'Falls Church': 114409,
 'Prince William': 91098}

In [3]:
median_income_dict['Amherst']

44757

In [4]:
median_income_series = pd.Series(median_income_dict, dtype=np.int32)

median_income_series

Alexandria         80847
Amherst            44757
Arlington          94880
Augusta            50612
Bedford            54110
Botetourt          64724
Fairfax           105416
Falls Church      114409
Prince William     91098
dtype: int32

In [5]:
median_income_series.head()

Alexandria    80847
Amherst       44757
Arlington     94880
Augusta       50612
Bedford       54110
dtype: int32

In [6]:
median_income_series.head(3)

Alexandria    80847
Amherst       44757
Arlington     94880
dtype: int32

In [7]:
median_income_series.tail()

Bedford            54110
Botetourt          64724
Fairfax           105416
Falls Church      114409
Prince William     91098
dtype: int32

In [8]:
median_income_series.tail(3)

Fairfax           105416
Falls Church      114409
Prince William     91098
dtype: int32

In [9]:
median_income_series.index

Index(['Alexandria', 'Amherst', 'Arlington', 'Augusta', 'Bedford', 'Botetourt',
       'Fairfax', 'Falls Church', 'Prince William'],
      dtype='object')

In [10]:
median_income_series.values

array([ 80847,  44757,  94880,  50612,  54110,  64724, 105416, 114409,
        91098])

In [11]:
income_values = median_income_series.values
income_values.dtype

dtype('int32')

In [15]:
type(median_income_series["Amherst"])

numpy.int32

In [14]:
type(median_income_series[ ["Amherst"] ])

pandas.core.series.Series

In [16]:
median_income_series["Amherst"] # return back a single value from the series

44757

In [17]:
median_income_series[ ["Amherst", "Botetourt" ] ] # return back a series with the specified indexes

Amherst      44757
Botetourt    64724
dtype: int32

In [18]:
median_income_series["Amherst":"Bedford"] # return back a series with the specified range of indexes

Amherst      44757
Arlington    94880
Augusta      50612
Bedford      54110
dtype: int32

In [19]:
median_income_series

Alexandria         80847
Amherst            44757
Arlington          94880
Augusta            50612
Bedford            54110
Botetourt          64724
Fairfax           105416
Falls Church      114409
Prince William     91098
dtype: int32

In [20]:
median_income_series[1]

44757

In [21]:
median_income_series[1:3]

Amherst      44757
Arlington    94880
dtype: int32

In [22]:
median_income_series[ [1, 4]]

Amherst    44757
Bedford    54110
dtype: int32

In [23]:
median_income_series.iloc[1]

44757

In [24]:
median_income_series.loc['Amherst']

44757

In [25]:
median_income_series.shape

(9,)

In [26]:
households_dict = {
    "Alexandria": 68082,
    "Amherst": 12560,
    "Arlington": 98050,
    "Augusta": 28516,
    "Bedford": 27465,
    "Botetourt": 13126,
    "Fairfax": 391627,
    "Falls Church":	5101,
    "Prince William": 130785,
}

households_series = pd.Series(households_dict)

households_series

Alexandria         68082
Amherst            12560
Arlington          98050
Augusta            28516
Bedford            27465
Botetourt          13126
Fairfax           391627
Falls Church        5101
Prince William    130785
dtype: int64

In [28]:
counties = pd.DataFrame({
    "median_income": median_income_series,
    "households": households_series
})

counties

Unnamed: 0,median_income,households
Alexandria,80847,68082
Amherst,44757,12560
Arlington,94880,98050
Augusta,50612,28516
Bedford,54110,27465
Botetourt,64724,13126
Fairfax,105416,391627
Falls Church,114409,5101
Prince William,91098,130785


In [29]:
counties.columns

Index(['median_income', 'households'], dtype='object')

In [30]:
counties.index

Index(['Alexandria', 'Amherst', 'Arlington', 'Augusta', 'Bedford', 'Botetourt',
       'Fairfax', 'Falls Church', 'Prince William'],
      dtype='object')

In [31]:
counties.values

array([[ 80847,  68082],
       [ 44757,  12560],
       [ 94880,  98050],
       [ 50612,  28516],
       [ 54110,  27465],
       [ 64724,  13126],
       [105416, 391627],
       [114409,   5101],
       [ 91098, 130785]], dtype=int64)

In [32]:
counties.values.shape

(9, 2)

In [34]:
a = counties.values
b = counties.values

print(id(a) == id(b))
print(a.shape)
print(b.shape)

False
(9, 2)
(9, 2)


In [36]:
a[0,0] = 999
a[0,0]

999

In [38]:
b[0,0]

80847

In [39]:
counties.columns = ["Median Income", "Households"]

counties

Unnamed: 0,Median Income,Households
Alexandria,80847,68082
Amherst,44757,12560
Arlington,94880,98050
Augusta,50612,28516
Bedford,54110,27465
Botetourt,64724,13126
Fairfax,105416,391627
Falls Church,114409,5101
Prince William,91098,130785


In [41]:
counties2 = counties.stack()

counties2

Alexandria      Median Income     80847
                Households        68082
Amherst         Median Income     44757
                Households        12560
Arlington       Median Income     94880
                Households        98050
Augusta         Median Income     50612
                Households        28516
Bedford         Median Income     54110
                Households        27465
Botetourt       Median Income     64724
                Households        13126
Fairfax         Median Income    105416
                Households       391627
Falls Church    Median Income    114409
                Households         5101
Prince William  Median Income     91098
                Households       130785
dtype: int64

In [43]:
print(type(counties2))
counties2.index

<class 'pandas.core.series.Series'>


MultiIndex([(    'Alexandria', 'Median Income'),
            (    'Alexandria',    'Households'),
            (       'Amherst', 'Median Income'),
            (       'Amherst',    'Households'),
            (     'Arlington', 'Median Income'),
            (     'Arlington',    'Households'),
            (       'Augusta', 'Median Income'),
            (       'Augusta',    'Households'),
            (       'Bedford', 'Median Income'),
            (       'Bedford',    'Households'),
            (     'Botetourt', 'Median Income'),
            (     'Botetourt',    'Households'),
            (       'Fairfax', 'Median Income'),
            (       'Fairfax',    'Households'),
            (  'Falls Church', 'Median Income'),
            (  'Falls Church',    'Households'),
            ('Prince William', 'Median Income'),
            ('Prince William',    'Households')],
           )

In [44]:
counties2 = counties2.unstack()

counties2

Unnamed: 0,Median Income,Households
Alexandria,80847,68082
Amherst,44757,12560
Arlington,94880,98050
Augusta,50612,28516
Bedford,54110,27465
Botetourt,64724,13126
Fairfax,105416,391627
Falls Church,114409,5101
Prince William,91098,130785


In [45]:
counties2 = counties2.unstack(0)

counties2

Median Income  Alexandria         80847
               Amherst            44757
               Arlington          94880
               Augusta            50612
               Bedford            54110
               Botetourt          64724
               Fairfax           105416
               Falls Church      114409
               Prince William     91098
Households     Alexandria         68082
               Amherst            12560
               Arlington          98050
               Augusta            28516
               Bedford            27465
               Botetourt          13126
               Fairfax           391627
               Falls Church        5101
               Prince William    130785
dtype: int64

In [47]:
counties2["Median Income"]["Amherst"]

44757

In [48]:
counties2 = counties2.unstack(0)

counties2

Unnamed: 0,Median Income,Households
Alexandria,80847,68082
Amherst,44757,12560
Arlington,94880,98050
Augusta,50612,28516
Bedford,54110,27465
Botetourt,64724,13126
Fairfax,105416,391627
Falls Church,114409,5101
Prince William,91098,130785


In [50]:
counties2 = counties2.unstack()

counties2

Unnamed: 0,Alexandria,Amherst,Arlington,Augusta,Bedford,Botetourt,Fairfax,Falls Church,Prince William
Median Income,80847,44757,94880,50612,54110,64724,105416,114409,91098
Households,68082,12560,98050,28516,27465,13126,391627,5101,130785


In [51]:
counties["Region"] = [
    "Northern",
    "Central",
    "Northern",
    "Central",
    "Central",
    "Southwestern",
    "Northern",
    "Northern",
    "Northern",
]

counties.dtypes

Median Income     int32
Households        int64
Region           object
dtype: object

In [52]:
counties

Unnamed: 0,Median Income,Households,Region
Alexandria,80847,68082,Northern
Amherst,44757,12560,Central
Arlington,94880,98050,Northern
Augusta,50612,28516,Central
Bedford,54110,27465,Central
Botetourt,64724,13126,Southwestern
Fairfax,105416,391627,Northern
Falls Church,114409,5101,Northern
Prince William,91098,130785,Northern


In [53]:
counties["Total Household Income"] = counties["Median Income"] * counties["Households"]

counties

Unnamed: 0,Median Income,Households,Region,Total Household Income
Alexandria,80847,68082,Northern,5504225454
Amherst,44757,12560,Central,562147920
Arlington,94880,98050,Northern,9302984000
Augusta,50612,28516,Central,1443251792
Bedford,54110,27465,Central,1486131150
Botetourt,64724,13126,Southwestern,849567224
Fairfax,105416,391627,Northern,41283751832
Falls Church,114409,5101,Northern,583600309
Prince William,91098,130785,Northern,11914251930


In [54]:
counties.index.name = "County"

In [55]:
counties

Unnamed: 0_level_0,Median Income,Households,Region,Total Household Income
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alexandria,80847,68082,Northern,5504225454
Amherst,44757,12560,Central,562147920
Arlington,94880,98050,Northern,9302984000
Augusta,50612,28516,Central,1443251792
Bedford,54110,27465,Central,1486131150
Botetourt,64724,13126,Southwestern,849567224
Fairfax,105416,391627,Northern,41283751832
Falls Church,114409,5101,Northern,583600309
Prince William,91098,130785,Northern,11914251930


In [56]:
counties.to_csv("counties.csv")

In [57]:
counties3 = pd.read_csv("counties.csv", index_col=0)

counties3

Unnamed: 0_level_0,Median Income,Households,Region,Total Household Income
County,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1
Alexandria,80847,68082,Northern,5504225454
Amherst,44757,12560,Central,562147920
Arlington,94880,98050,Northern,9302984000
Augusta,50612,28516,Central,1443251792
Bedford,54110,27465,Central,1486131150
Botetourt,64724,13126,Southwestern,849567224
Fairfax,105416,391627,Northern,41283751832
Falls Church,114409,5101,Northern,583600309
Prince William,91098,130785,Northern,11914251930


In [58]:
table = counties3.pivot_table(values=['Total Household Income'], index=['Region'],
                              aggfunc=np.sum)

table

Unnamed: 0_level_0,Total Household Income
Region,Unnamed: 1_level_1
Central,3491530862
Northern,68588813525
Southwestern,849567224


In [59]:
table = counties3.pivot_table(values=['Total Household Income'], index=['Region'],
                              aggfunc={'Total Household Income': [min, max, np.mean]})

table

Unnamed: 0_level_0,Total Household Income,Total Household Income,Total Household Income
Unnamed: 0_level_1,max,mean,min
Region,Unnamed: 1_level_2,Unnamed: 2_level_2,Unnamed: 3_level_2
Central,1486131150,1163844000.0,562147920
Northern,41283751832,13717760000.0,583600309
Southwestern,849567224,849567200.0,849567224


In [61]:
wine_data = pd.read_csv("Portuguese_VinhoVerde_RedWine_Quality.csv")

wine_data.head()

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,11.6,0.58,0.66,2.2,0.074,10.0,47.0,1.0008,3.25,0.57,9.0,3
1,10.4,0.61,0.49,2.1,0.2,5.0,16.0,0.9994,3.16,0.63,8.4,3
2,7.4,1.185,0.0,4.25,0.097,5.0,14.0,0.9966,3.63,0.54,10.7,3
3,10.4,0.44,0.42,1.5,0.145,34.0,48.0,0.99832,3.38,0.86,9.9,3
4,8.3,1.02,0.02,3.4,0.084,6.0,11.0,0.99892,3.48,0.49,11.0,3


In [62]:
wine_data.dtypes

fixed acidity           float64
volatile acidity        float64
citric acid             float64
residual sugar          float64
chlorides               float64
free sulfur dioxide     float64
total sulfur dioxide    float64
density                 float64
pH                      float64
sulphates               float64
alcohol                 float64
quality                   int64
dtype: object

In [63]:
wine_dtypes = {
    "fixed acidity": np.float32,
    "volatile acidity": np.float32,
    "citric acid": np.float32,
    "residual sugar": np.float32,
    "chlorides": np.float32,
    "free sulfur dioxide": np.float32,
    "total sulfur dioxide": np.float32,
    "density": np.float32,
    "pH": np.float32,
    "sulphates": np.float32,
    "alcohol": np.float32,
    "quality": np.int8,
}


wine_data = pd.read_csv("Portuguese_VinhoVerde_RedWine_Quality.csv", dtype=wine_dtypes)

wine_data

Unnamed: 0,fixed acidity,volatile acidity,citric acid,residual sugar,chlorides,free sulfur dioxide,total sulfur dioxide,density,pH,sulphates,alcohol,quality
0,11.6,0.580,0.66,2.20,0.074,10.0,47.0,1.00080,3.25,0.57,9.0,3
1,10.4,0.610,0.49,2.10,0.200,5.0,16.0,0.99940,3.16,0.63,8.4,3
2,7.4,1.185,0.00,4.25,0.097,5.0,14.0,0.99660,3.63,0.54,10.7,3
3,10.4,0.440,0.42,1.50,0.145,34.0,48.0,0.99832,3.38,0.86,9.9,3
4,8.3,1.020,0.02,3.40,0.084,6.0,11.0,0.99892,3.48,0.49,11.0,3
...,...,...,...,...,...,...,...,...,...,...,...,...
1594,8.6,0.420,0.39,1.80,0.068,6.0,12.0,0.99516,3.35,0.69,11.7,8
1595,5.5,0.490,0.03,1.80,0.044,28.0,87.0,0.99080,3.50,0.82,14.0,8
1596,7.2,0.330,0.33,1.70,0.061,3.0,13.0,0.99600,3.23,1.10,10.0,8
1597,7.2,0.380,0.31,2.00,0.056,15.0,29.0,0.99472,3.23,0.76,11.3,8


In [64]:
wine_data.dtypes

fixed acidity           float32
volatile acidity        float32
citric acid             float32
residual sugar          float32
chlorides               float32
free sulfur dioxide     float32
total sulfur dioxide    float32
density                 float32
pH                      float32
sulphates               float32
alcohol                 float32
quality                    int8
dtype: object

In [65]:
wine_data.index

RangeIndex(start=0, stop=1599, step=1)

In [67]:
wine_data.columns

Index(['fixed acidity', 'volatile acidity', 'citric acid', 'residual sugar',
       'chlorides', 'free sulfur dioxide', 'total sulfur dioxide', 'density',
       'pH', 'sulphates', 'alcohol', 'quality'],
      dtype='object')

In [68]:
df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]})

df2 = pd.DataFrame({'C': [1, 2, 4], 'D': [3, 3, 8]})

display(df1)
display(df2)

Unnamed: 0,A,B
0,0,4
1,0,4


Unnamed: 0,C,D
0,1,3
1,2,3
2,4,8


In [72]:
# df3 = df1.join(df2, how="right") # select the index of the second data frame
# df3 = df1.join(df2, how="left") # select the index of the first data frame
# df3 = df1.join(df2, how="inner") # select the indexes the data frames share
df3 = df1.join(df2, how="outer") # select all of the indexes for both data frames

df3

Unnamed: 0,A,B,C,D
0,0.0,4.0,1,3
1,0.0,4.0,2,3
2,,,4,8


In [74]:
df1 = pd.DataFrame({'A': ['A0', 'A0'], 'B': ['B4', 'B4']})

df2 = pd.DataFrame({'C': ['C1', 'C2', 'C4'], 'D': ['D3', 'D3', 'D8']})

display(df1)
display(df2)

Unnamed: 0,A,B
0,A0,B4
1,A0,B4


Unnamed: 0,C,D
0,C1,D3
1,C2,D3
2,C4,D8


In [75]:
df3 = df1.join(df2, how="right")

display(df3)

Unnamed: 0,A,B,C,D
0,A0,B4,C1,D3
1,A0,B4,C2,D3
2,,,C4,D8


In [76]:
df3.dtypes

A    object
B    object
C    object
D    object
dtype: object

In [77]:
df1 = pd.DataFrame({'A': [0, 0], 'B': [4, 4]})
df2 = pd.DataFrame({'A': [1, 1], 'B': [3, 3]})

display(df1)
display(df2)

Unnamed: 0,A,B
0,0,4
1,0,4


Unnamed: 0,A,B
0,1,3
1,1,3


In [79]:
df3 = pd.concat([ df1, df2 ])
df3.reset_index(drop=True, inplace=True)

df3

Unnamed: 0,A,B
0,0,4
1,0,4
2,1,3
3,1,3


In [80]:
pd.DataFrame([ [1, 2], [4, 5] ], columns=["A", "B"])

Unnamed: 0,A,B
0,1,2
1,4,5
