In [None]:
import pandas as pd

In [None]:
!ls ../../datasets

In [None]:
local_source = '../../datasets/titanic.csv'
df = pd.read_csv(local_source)

In [None]:
df

# 6.1 Spliting data into other data frames based on column values (Keeping the same headers) - Horizontal Split 

In [None]:
df_male = df[df['Sex'] == 'male']

In [None]:
df_male

In [None]:
df_female = df[df['Sex'] == 'female']

In [None]:
df_female

# 6.2 Spliting data into other data frames based on column values and selecting few columns - Vertical Split

In [None]:
df_split_4_columns = df[['Name', 'Sex','Age', 'Ticket']]

In [None]:
df_split_4_columns

In [None]:
df_split_8_columns = df[['Name', 'Survived', 'Pclass','SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']]

In [None]:
df_split_8_columns

# 6.3 Pandas Concat - Combine Rows with the same header

pandas.concat(objs, axis=0, join='outer', ignore_index=False, keys=None,
              levels=None, names=None, verify_integrity=False, sort=False, copy=True)

- objs is the list of DataFrame objects ([df1, df2, ...]) to be concatenated
- axis defines the direction of the concatenation, 0 for row-wise and 1 for column-wise
- join can either be inner (intersection) or outer (union)

### we already have df_male and df_female data frames with the exact same header

In [None]:
df_mix_sex = pd.concat([df_female, df_male])

In [None]:
df_mix_sex

In [None]:
df_mix_sex = pd.concat([df_female, df_male]).reset_index(drop = True)

In [None]:
df_mix_sex

In [None]:
df_male.shape

In [None]:
df_female.shape

In [None]:
pd.concat([df_male, df_female],ignore_index=True)

In [None]:
pd.concat([df_male, df_female])

In [None]:
pd.concat([df_male, df_female], sort=['Ticket'])

### Note: Pandas concat function work both horizontally and vertically. 
- To join two DataFrames together column-wise, you need to change the axis value from the default 0 to 1
- axis : 0 (Rowwise Concat)
- axis : 1 (Columnwide Concat)

# 6.4 Pandas Concat Option - Merge Columns

In [None]:
df_concat_left = df[['Name', 'Sex','Age', 'Ticket']]

In [None]:
df_concat_right = df[['Name', 'Survived', 'Pclass','SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']]

In [None]:
pd.concat([df_concat_left, df_concat_right], axis= 1 )

# 6.5 Pandas merge Option

pd.merge(left, right, how='inner', on=None, left_on=None, right_on=None,
         left_index=False, right_index=False, sort=True,
         suffixes=('_x', '_y'), copy=True, indicator=False,
         validate=None)

## Merging with Non Indexed DataFrame 

In [None]:
df

In [None]:
df_a = df[['Name', 'Sex','Age', 'Ticket']]

In [None]:
df_a 

In [None]:
df_b = df[['Name', 'Survived', 'Pclass','SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']]

In [None]:
df_b

In [None]:
df_merged_non_index = pd.merge(df_a, df_b)

In [None]:
df_merged_non_index

## Merging with Indexed DataFrame 

In [None]:
df_indexed = pd.read_csv(local_source, index_col= 'PassengerId')

In [None]:
df_indexed

In [None]:
df_part_1 = df_indexed[['Survived', 'Pclass','SibSp', 'Parch', 'Fare', 'Cabin', 'Embarked']]

In [None]:
df_part_1

In [None]:
df_part_2 = df_indexed[['Name', 'Sex','Age', 'Ticket']]

In [None]:
df_part_2

In [None]:
df_merged = pd.merge(df_part_1, df_part_2, left_index=True, right_index=True)

In [None]:
df_merged

In [None]:
df_merged = pd.merge(df_female, df_male)

In [None]:
df_merged

# 6.6 Pandas Join method to combine two DataFrames

DataFrame.join(other, on=None, how='left', lsuffix='', rsuffix='', sort=False).

In [None]:
df_joined_left = df_part_1.join(df_part_2)

In [None]:
df_joined_left

In [None]:
df_joined_right = df_part_1.join(df_part_2, how = "right")

In [None]:
df_joined_right

# 6.7 Pandas Append method to combine two DataFrames

Append command is applicable to DataFrame and very useful to merge two DataFrames based on row axis where the headers are same between two dataframes

In [None]:
df_male

In [None]:
df_female

In [None]:
df_male_female_appended = df_male.append(df_female)

In [None]:
df_male_female_appended

In [None]:
df_male

In [None]:
df_male_local = df_male

In [None]:
df_male_local.append(df_female)

In [None]:
df_male_local

### Note:
- When append() is applied, it will not match DataFrames on any keys. 
- The result does not update the actuall DataFrame instead return a result in new data frame
- If DataFrames shape do not match, all unmatched cells will be replaced by NaN value

# 6.8 Pandas Update method to combine two DataFrames

### Note:
- Update function does the inplace data update in the dataframe where it is called
- It does not add a new rows, instead just update the data in the matched rows
- If there is no matching data in second dataframe, the first dataframe will not change

In [None]:
df_male

In [None]:
df_female

In [None]:
df_male_local = df_male

In [None]:
df_male_local.update(df_female, )

In [None]:
df_male_local

In [None]:
df_male_local.iloc[101,0:]

In [None]:
df_male_local.iloc[10]

In [None]:
df_male_local.iloc[[10]]

In [None]:
df_row_selected = df_male_local.iloc[[10]]

In [None]:
df_row_selected

In [None]:
df_row_selected.loc[df_row_selected['PassengerId'] == 22,'Survived']

In [None]:
df_row_selected.loc[df_row_selected['PassengerId'] == 22,'Survived'] = 5

In [None]:
df_row_selected

In [None]:
df_male_local.update(df_row_selected)

In [None]:
df_male_local.iloc[10]

# Bonus Tip: Reading Clipboard

In [None]:
dfc = pd.read_clipboard()

In [None]:
dfc