# data aggregation continued
* Random sampling with groups

In [1]:
import matplotlib.pyplot as plt
import matplotlib
import numpy as np
import pandas as pd

## reading titanictrain data for summarizing

In [2]:
titanicdf1 = pd.read_csv('titanictrain.csv')
titanicdf1.columns

Index(['PassengerId', 'Survived', 'Pclass', 'Name', 'Sex', 'Age', 'SibSp',
       'Parch', 'Ticket', 'Fare', 'Cabin', 'Embarked'],
      dtype='object')

In [3]:
len(titanicdf1)

891

## replacing Nan values with mode in both types

In [3]:
for col in titanicdf1:
    if titanicdf1[col].values.dtype  in [np.int64,np.float64]:
        print(col, titanicdf1[col].values.dtype,"Numeric")
        if titanicdf1[col].isnull().sum()>0:
            print("replacing by mode values")
            x=titanicdf1[col].mode().values
            titanicdf1[col].replace(np.NaN,x[0],inplace=True)
    else:
       
        print(col, titanicdf1[col].values.dtype,"Categorical")
        if titanicdf1[col].isnull().sum()>0:
            print("Null values to be replaced by modal value")
            m=titanicdf1[col].describe()[2]
            titanicdf1[col].replace(np.NaN,m,inplace=True)

PassengerId int64 Numeric
Survived int64 Numeric
Pclass int64 Numeric
Name object Categorical
Sex object Categorical
Age float64 Numeric
replacing by mode values
SibSp int64 Numeric
Parch int64 Numeric
Ticket object Categorical
Fare float64 Numeric
Cabin object Categorical
Null values to be replaced by modal value
Embarked object Categorical
Null values to be replaced by modal value


# Random Sampling using a deck of cards

## Generating a deck of cards (series) for random sampling

In [4]:
# Hearts, Spades, Clubs, Diamonds
suits = ['H', 'S', 'C', 'D']
card_val = (list(range(1, 11)) + [10] * 3) * 4
display(card_val)

[1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 10,
 10,
 10,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 10,
 10,
 10,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 10,
 10,
 10,
 1,
 2,
 3,
 4,
 5,
 6,
 7,
 8,
 9,
 10,
 10,
 10,
 10]

In [5]:
len(card_val)

52

## making cards

In [6]:
base_names = ['A'] + list(range(2, 11)) + ['J', 'K', 'Q']
cards = []
for suit in ['H', 'S', 'C', 'D']:
    cards.extend(str(num) + suit for num in base_names)

deck = pd.Series(card_val, index=cards)

In [8]:
deck[13:]

AS      1
2S      2
3S      3
4S      4
5S      5
6S      6
7S      7
8S      8
9S      9
10S    10
JS     10
KS     10
QS     10
AC      1
2C      2
3C      3
4C      4
5C      5
6C      6
7C      7
8C      8
9C      9
10C    10
JC     10
KC     10
QC     10
AD      1
2D      2
3D      3
4D      4
5D      5
6D      6
7D      7
8D      8
9D      9
10D    10
JD     10
KD     10
QD     10
dtype: int64

## Selecting random 5 cards

In [10]:
def draw(deck, n=5):
    return deck.sample(n)
draw(deck)

KS    10
KC    10
3H     3
5H     5
4H     4
dtype: int64

## getting 2 random cards from each suit

In [11]:
get_suit = lambda card: card[-1] # last letter is suit
deck.groupby(get_suit).groups

{'C': ['AC', '2C', '3C', '4C', '5C', '6C', '7C', '8C', '9C', '10C', 'JC', 'KC', 'QC'], 'D': ['AD', '2D', '3D', '4D', '5D', '6D', '7D', '8D', '9D', '10D', 'JD', 'KD', 'QD'], 'H': ['AH', '2H', '3H', '4H', '5H', '6H', '7H', '8H', '9H', '10H', 'JH', 'KH', 'QH'], 'S': ['AS', '2S', '3S', '4S', '5S', '6S', '7S', '8S', '9S', '10S', 'JS', 'KS', 'QS']}

In [12]:
deck.groupby(get_suit).apply(draw, n=2)

C  8C      8
   3C      3
D  10D    10
   8D      8
H  6H      6
   QH     10
S  QS     10
   6S      6
dtype: int64

## In case card denomination is not needed seperately

In [13]:
deck.groupby(get_suit, group_keys=False).apply(draw, n=2)

5C      5
10C    10
9D      9
KD     10
2H      2
5H      5
2S      2
AS      1
dtype: int64

# Problem 2: Randomly select three passengers from each Embarked st for discount (Titanic data set)

In [15]:
draw(titanicdf1)

Unnamed: 0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
86,87,0,3,"Ford, Mr. William Neal",male,16.0,1,3,W./C. 6608,34.375,G6,S
77,78,0,3,"Moutal, Mr. Rahamin Haim",male,24.0,0,0,374746,8.05,G6,S
168,169,0,1,"Baumann, Mr. John D",male,24.0,0,0,PC 17318,25.925,G6,S
284,285,0,1,"Smith, Mr. Richard William",male,24.0,0,0,113056,26.0,A19,S
298,299,1,1,"Saalfeld, Mr. Adolphe",male,24.0,0,0,19988,30.5,C106,S


In [16]:
titanicdf1.groupby('Embarked').count()

Unnamed: 0_level_0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin
Embarked,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1
C,168,168,168,168,168,168,168,168,168,168,168
Q,77,77,77,77,77,77,77,77,77,77,77
S,646,646,646,646,646,646,646,646,646,646,646


In [17]:
titanicdf1.groupby('Embarked').apply(draw,n=3)

Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Embarked,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
C,39,40,1,3,"Nicola-Yarred, Miss. Jamila",female,14.0,1,0,2651,11.2417,G6,C
C,599,600,1,1,"Duff Gordon, Sir. Cosmo Edmund (""Mr Morgan"")",male,49.0,1,0,PC 17485,56.9292,A20,C
C,495,496,0,3,"Yousseff, Mr. Gerious",male,24.0,0,0,2627,14.4583,G6,C
Q,245,246,0,1,"Minahan, Dr. William Edward",male,44.0,2,0,19928,90.0,C78,Q
Q,653,654,1,3,"O'Leary, Miss. Hanora ""Norah""",female,24.0,0,0,330919,7.8292,G6,Q
Q,32,33,1,3,"Glynn, Miss. Mary Agatha",female,24.0,0,0,335677,7.75,G6,Q
S,398,399,0,2,"Pain, Dr. Alfred",male,23.0,0,0,244278,10.5,G6,S
S,75,76,0,3,"Moen, Mr. Sigurd Hansen",male,25.0,0,0,348123,7.65,F G73,S
S,199,200,0,2,"Yrois, Miss. Henriette (""Mrs Harbeck"")",female,24.0,0,0,248747,13.0,G6,S


In [20]:
titanicdf1.groupby('Sex').apply(draw)

Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Survived,Pclass,Name,Sex,Age,SibSp,Parch,Ticket,Fare,Cabin,Embarked
Sex,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1
female,10,11,1,3,"Sandstrom, Miss. Marguerite Rut",female,4.0,1,1,PP 9549,16.7,G6,S
female,229,230,0,3,"Lefebre, Miss. Mathilde",female,24.0,3,1,4133,25.4667,B96 B98,S
female,423,424,0,3,"Danbom, Mrs. Ernst Gilbert (Anna Sigrid Maria ...",female,28.0,1,1,347080,14.4,B96 B98,S
female,501,502,0,3,"Canavan, Miss. Mary",female,21.0,0,0,364846,7.75,B96 B98,Q
female,215,216,1,1,"Newell, Miss. Madeleine",female,31.0,1,0,35273,113.275,D36,C
male,253,254,0,3,"Lobb, Mr. William Arthur",male,30.0,1,0,A/5. 3336,16.1,B96 B98,S
male,647,648,1,1,"Simonius-Blumer, Col. Oberst Alfons",male,56.0,0,0,13213,35.5,A26,C
male,481,482,0,2,"Frost, Mr. Anthony Wood ""Archie""",male,24.0,0,0,239854,0.0,B96 B98,S
male,227,228,0,3,"Lovell, Mr. John Hall (""Henry"")",male,20.5,0,0,A/5 21173,7.25,B96 B98,S
male,101,102,0,3,"Petroff, Mr. Pastcho (""Pentcho"")",male,24.0,0,0,349215,7.8958,B96 B98,S


# Group Weighted Average and Correlation

In [18]:
df = pd.DataFrame({'category': ['a', 'a', 'a', 'a',
                                'b', 'b', 'b', 'b'],
                   'data': np.random.randn(8),
                   'weights': np.random.rand(8)})
df

Unnamed: 0,category,data,weights
0,a,-0.078764,0.106988
1,a,0.451919,0.840555
2,a,0.831902,0.201301
3,a,-1.0654,0.757568
4,b,-0.004951,0.502392
5,b,-2.349743,0.047092
6,b,1.305978,0.108507
7,b,0.777124,0.735998


## simple average by np.mean()

In [19]:
np.mean(df.data)

-0.016491983974945876

## np.average is used to get weighted means(): first applying on DF

In [20]:
get_wavg = lambda g: np.average(g['data'], weights=g['weights'])

In [21]:
get_wavg(df)

0.10068819510618315

# Weighted average over groups

In [22]:
grouped = df.groupby('category')
grouped.apply(get_wavg)

category
a   -0.140691
b    0.430797
dtype: float64

# Reading Yahoo Finance Data
## if True and parse_dates is enabled, pandas will attempt to infer the format of the datetime strings in the columns, and if it can be inferred, switch to a faster method of parsing them.

In [23]:
close_px = pd.read_csv('stock_px_2.csv', parse_dates=True,
                       index_col=0)
close_px.info()
close_px[-4:]

<class 'pandas.core.frame.DataFrame'>
DatetimeIndex: 2214 entries, 2003-01-02 to 2011-10-14
Data columns (total 4 columns):
 #   Column  Non-Null Count  Dtype  
---  ------  --------------  -----  
 0   AAPL    2214 non-null   float64
 1   MSFT    2214 non-null   float64
 2   XOM     2214 non-null   float64
 3   SPX     2214 non-null   float64
dtypes: float64(4)
memory usage: 86.5 KB


Unnamed: 0,AAPL,MSFT,XOM,SPX
2011-10-11,400.29,27.0,76.27,1195.54
2011-10-12,402.19,26.96,77.16,1207.25
2011-10-13,408.43,27.18,76.37,1203.66
2011-10-14,422.0,27.27,78.11,1224.58


In [24]:
spx_corr = lambda x: x.corrwith(x['SPX'])

In [29]:
close_px.corrwith(close_px['SPX'])

AAPL    0.244478
MSFT    0.746871
XOM     0.528731
SPX     1.000000
dtype: float64

## Compute Percentage of Change with respect to last element. Useful in time series data

In [31]:
close_px.head()

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003-01-02,7.4,21.11,29.22,909.03
2003-01-03,7.45,21.14,29.24,908.59
2003-01-06,7.45,21.52,29.96,929.01
2003-01-07,7.43,21.93,28.95,922.93
2003-01-08,7.28,21.31,28.83,909.93


In [30]:
rets = close_px.pct_change().dropna()
rets

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003-01-03,0.006757,0.001421,0.000684,-0.000484
2003-01-06,0.000000,0.017975,0.024624,0.022474
2003-01-07,-0.002685,0.019052,-0.033712,-0.006545
2003-01-08,-0.020188,-0.028272,-0.004145,-0.014086
2003-01-09,0.008242,0.029094,0.021159,0.019386
...,...,...,...,...
2011-10-10,0.051406,0.026286,0.036977,0.034125
2011-10-11,0.029526,0.002227,-0.000131,0.000544
2011-10-12,0.004747,-0.001481,0.011669,0.009795
2011-10-13,0.015515,0.008160,-0.010238,-0.002974


## get correlation year-wise

In [32]:
get_year = lambda x: x.year
by_year = rets.groupby(get_year)
by_year.apply(spx_corr)

Unnamed: 0,AAPL,MSFT,XOM,SPX
2003,0.541124,0.745174,0.661265,1.0
2004,0.374283,0.588531,0.557742,1.0
2005,0.46754,0.562374,0.63101,1.0
2006,0.428267,0.406126,0.518514,1.0
2007,0.508118,0.65877,0.786264,1.0
2008,0.681434,0.804626,0.828303,1.0
2009,0.707103,0.654902,0.797921,1.0
2010,0.710105,0.730118,0.839057,1.0
2011,0.691931,0.800996,0.859975,1.0


In [33]:
by_year.apply(lambda g: g['AAPL'].corr(g['MSFT']))

2003    0.480868
2004    0.259024
2005    0.300093
2006    0.161735
2007    0.417738
2008    0.611901
2009    0.432738
2010    0.571946
2011    0.581987
dtype: float64

## Problem to do : Compute pct_change year-wise for the stock data

# Pivot Tables and Cross-Tabulation

In [34]:
titanicdf1.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 891 entries, 0 to 890
Data columns (total 12 columns):
 #   Column       Non-Null Count  Dtype  
---  ------       --------------  -----  
 0   PassengerId  891 non-null    int64  
 1   Survived     891 non-null    int64  
 2   Pclass       891 non-null    int64  
 3   Name         891 non-null    object 
 4   Sex          891 non-null    object 
 5   Age          891 non-null    float64
 6   SibSp        891 non-null    int64  
 7   Parch        891 non-null    int64  
 8   Ticket       891 non-null    object 
 9   Fare         891 non-null    float64
 10  Cabin        891 non-null    object 
 11  Embarked     891 non-null    object 
dtypes: float64(2), int64(5), object(5)
memory usage: 83.7+ KB


* pivot_table: aggregates a table of data by one or more keys, arranging the data in a rectangle with some of the group keys along the rows and some along the columns. 
* same facility done thru  groupby combined with reshape operations utilizing hierarchical indexing.
* Additional facility is of margin: for partial total
* default fn: aggregation

## Problem: find average of Fare and age of all passengers for combinations of  ['Sex', 'Embarked']

In [35]:
titanicdf1.groupby(['Sex', 'Embarked']).mean()

Unnamed: 0_level_0,Unnamed: 1_level_0,PassengerId,Survived,Pclass,Age,SibSp,Parch,Fare
Sex,Embarked,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
female,C,437.315068,0.876712,1.726027,27.630137,0.547945,0.493151,75.169805
female,Q,374.138889,0.75,2.888889,24.097222,0.222222,0.222222,12.634958
female,S,438.780488,0.692683,2.185366,27.67561,0.829268,0.780488,39.143456
male,C,451.536842,0.305263,2.010526,30.536,0.263158,0.263158,48.262109
male,Q,456.317073,0.073171,2.926829,26.707317,0.609756,0.121951,13.838922
male,S,454.507937,0.174603,2.421769,29.25,0.44898,0.240363,21.711996


* columns names are sorted in the output in case of pivottable

In [36]:
titanicdf1.pivot_table(index=['Sex', 'Embarked'])


Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Fare,Parch,PassengerId,Pclass,SibSp,Survived
Sex,Embarked,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
female,C,27.630137,75.169805,0.493151,437.315068,1.726027,0.547945,0.876712
female,Q,24.097222,12.634958,0.222222,374.138889,2.888889,0.222222,0.75
female,S,27.67561,39.143456,0.780488,438.780488,2.185366,0.829268,0.692683
male,C,30.536,48.262109,0.263158,451.536842,2.010526,0.263158,0.305263
male,Q,26.707317,13.838922,0.121951,456.317073,2.926829,0.609756,0.073171
male,S,29.25,21.711996,0.240363,454.507937,2.421769,0.44898,0.174603


## find average of Fare and age seperately for survivals and non survivals index over ['Sex', 'Embarked']

In [37]:
titanicdf1.pivot_table(['Fare', 'Age'], index=['Sex', 'Embarked'])

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Fare
Sex,Embarked,Unnamed: 2_level_1,Unnamed: 3_level_1
female,C,27.630137,75.169805
female,Q,24.097222,12.634958
female,S,27.67561,39.143456
male,C,30.536,48.262109
male,Q,26.707317,13.838922
male,S,29.25,21.711996


In [38]:
titanicdf1.pivot_table(['Fare', 'Age'], index=['Sex', 'Embarked'],
                 columns='Survived')

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Age,Fare,Fare
Unnamed: 0_level_1,Survived,0,1,0,1
Sex,Embarked,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
female,C,25.055556,27.992188,16.215278,83.460286
female,Q,26.277778,23.37037,10.904633,13.211733
female,S,24.587302,29.045775,25.728508,45.095158
male,C,31.325758,28.738621,38.065342,71.468545
male,Q,26.789474,25.666667,13.911732,12.916667
male,S,29.914835,26.107143,19.881281,30.366286


## Use of Margin: find average of Fare and age seperately for survivals and non survivals alongwith their average ignoring survival class index over ['Sex', 'Embarked'] 
* margins=True will give average val for each key

In [39]:
titanicdf1.pivot_table(['Fare', 'Age'], index=['Sex', 'Embarked'],
                 columns='Survived',margins=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Age,Age,Fare,Fare,Fare
Unnamed: 0_level_1,Survived,0,1,All,0,1,All
Sex,Embarked,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
female,C,25.055556,27.992188,27.630137,16.215278,83.460286,75.169805
female,Q,26.277778,23.37037,24.097222,10.904633,13.211733,12.634958
female,S,24.587302,29.045775,27.67561,25.728508,45.095158,39.143456
male,C,31.325758,28.738621,30.536,38.065342,71.468545,48.262109
male,Q,26.789474,25.666667,26.707317,13.911732,12.916667,13.838922
male,S,29.914835,26.107143,29.25,19.881281,30.366286,21.711996
All,,29.117486,27.683246,28.56697,22.117887,48.395408,32.204208


## using user-specified agg func

In [21]:
titanicdf1.pivot_table(['Fare', 'Age'], index=['Sex', 'Embarked'],
                 columns='Survived',margins=True,aggfunc=len)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Age,Age,Fare,Fare,Fare
Unnamed: 0_level_1,Survived,0,1,All,0,1,All
Sex,Embarked,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
female,C,9.0,64.0,61.0,9.0,64.0,61.0
female,Q,9.0,27.0,12.0,9.0,27.0,12.0
female,S,63.0,140.0,186.0,63.0,140.0,186.0
male,C,66.0,29.0,69.0,66.0,29.0,69.0
male,Q,38.0,3.0,16.0,38.0,3.0,16.0
male,S,364.0,77.0,368.0,364.0,77.0,368.0
All,,424.0,288.0,712.0,424.0,288.0,712.0


In [40]:
titanicdf1.pivot_table(['Fare', 'Age'], index=['Sex', 'Embarked'],
                 columns='Survived',margins=True,aggfunc=sum)

Unnamed: 0_level_0,Unnamed: 1_level_0,Age,Age,Age,Fare,Fare,Fare
Unnamed: 0_level_1,Survived,0,1,All,0,1,All
Sex,Embarked,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2,Unnamed: 6_level_2,Unnamed: 7_level_2
female,C,225.5,1791.5,2017.0,145.9375,5341.4583,5487.3958
female,Q,236.5,631.0,867.5,98.1417,356.7168,454.8585
female,S,1549.0,4124.5,5673.5,1620.896,6403.5125,8024.4085
male,C,2067.5,833.42,2900.92,2512.3126,2072.5878,4584.9004
male,Q,1018.0,77.0,1095.0,528.6458,38.75,567.3958
male,S,10889.0,2010.25,12899.25,7236.7863,2338.204,9574.9903
All,,15985.5,9467.67,25453.17,12142.7199,16551.2294,28693.9493


In [23]:
titanicdf1.pivot_table(['Survived'], index=['Cabin', 'Embarked'],
                 columns='Sex',aggfunc=len,margins=True)

Unnamed: 0_level_0,Unnamed: 1_level_0,Survived,Survived,Survived
Unnamed: 0_level_1,Sex,female,male,All
Cabin,Embarked,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A10,C,,1.0,1
A14,S,,1.0,1
A16,C,1.0,,1
A19,S,,1.0,1
A20,C,,1.0,1
...,...,...,...,...
F38,Q,,1.0,1
F4,S,1.0,1.0,2
G6,S,4.0,,4
T,S,,1.0,1


## filling Na by 0 or specific val before applying gp aggregate functions

In [37]:
titanicdf1.pivot_table(['Survived'], index=['Cabin', 'Embarked'],
                 columns='Sex',aggfunc=len,margins=True,fill_value=0)

Unnamed: 0_level_0,Unnamed: 1_level_0,Survived,Survived,Survived
Unnamed: 0_level_1,Sex,female,male,All
Cabin,Embarked,Unnamed: 2_level_2,Unnamed: 3_level_2,Unnamed: 4_level_2
A10,C,0,1,1
A14,S,0,1,1
A16,C,1,0,1
A19,S,0,1,1
A20,C,0,1,1
...,...,...,...,...
F38,Q,0,1,1
F4,S,1,1,2
G6,S,4,0,4
T,S,0,1,1


## crosstab:  a special case of a pivot table to get group frequencies

In [41]:
pd.crosstab(titanicdf1.Embarked, titanicdf1.Survived)

Survived,0,1
Embarked,Unnamed: 1_level_1,Unnamed: 2_level_1
C,75,93
Q,47,30
S,427,219


In [24]:
pd.crosstab(titanicdf1.Embarked, titanicdf1.Survived, margins=True)

Survived,0,1,All
Embarked,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1
C,75,93,168
Q,47,30,77
S,427,217,644
All,549,340,889
