In [65]:
import pandas as pd

In [66]:
df1 = pd.DataFrame({'ID': [1, 2, 3], 'Name': ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'ID': [1, 2, 4], 'Age': [25, 30, 35]})

print(df1)
print(df2)

   ID     Name
0   1    Alice
1   2      Bob
2   3  Charlie
   ID  Age
0   1   25
1   2   30
2   4   35


In [67]:
inner_join = pd.merge(df1, df2, on='ID', how='inner')
print(inner_join)

   ID   Name  Age
0   1  Alice   25
1   2    Bob   30


In [68]:
outer_join = pd.merge(df1, df2, on='ID', how='outer')
print(outer_join)

   ID     Name   Age
0   1    Alice  25.0
1   2      Bob  30.0
2   3  Charlie   NaN
3   4      NaN  35.0


In [69]:
outer_join.loc[3, "Name"] = "Dau"
outer_join.loc[2, "Age"] = 23
print(outer_join)

   ID     Name   Age
0   1    Alice  25.0
1   2      Bob  30.0
2   3  Charlie  23.0
3   4      Dau  35.0


In [70]:
groupby_df = outer_join.groupby('Name').mean()
print(groupby_df)


          ID   Age
Name              
Alice    1.0  25.0
Bob      2.0  30.0
Charlie  3.0  23.0
Dau      4.0  35.0


In [71]:
data = {
    'Name': ['Alice', 'Bob', 'Alice', 'Bob', 'Alice'],
    'Score': [85, 90, 78, 92, 88],
    'Age': [20, 22, 20, 22, 20],
    'Height': [165, 180, 165, 180, 165],
    'Gender': ['F', 'M', 'F', 'M', 'F']
}

# Creating the DataFrame
df = pd.DataFrame(data)
print("Original DataFrame:\n", df)

Original DataFrame:
     Name  Score  Age  Height Gender
0  Alice     85   20     165      F
1    Bob     90   22     180      M
2  Alice     78   20     165      F
3    Bob     92   22     180      M
4  Alice     88   20     165      F


In [72]:
df_update = df.drop('Gender', axis=1)
print(df_update)

# axis = 1 is to specify that remove Gender col, 0 in case to drop a row

    Name  Score  Age  Height
0  Alice     85   20     165
1    Bob     90   22     180
2  Alice     78   20     165
3    Bob     92   22     180
4  Alice     88   20     165


In [73]:
grouped = df_update.groupby('Name')
groupby_df = grouped.mean()
print(groupby_df)

           Score   Age  Height
Name                          
Alice  83.666667  20.0   165.0
Bob    91.000000  22.0   180.0


In [74]:
# But inseated of dropping non-numeric cols we can retain them with alloting some agg operation that col
grouped = df.groupby('Name')
agg_df = grouped.agg({
    'Score': 'mean',
    'Age': 'mean',
    'Height': 'mean',
    'Gender': 'max'
})

# Since we can't do mean on non-numeric allot some other agg operations like min, max, count, first, last, etc.,
print(agg_df)

           Score   Age  Height Gender
Name                                 
Alice  83.666667  20.0   165.0      F
Bob    91.000000  22.0   180.0      M


In [75]:
df['Grade'] = df['Score'].apply(
    lambda x:
    'A' if x > 90 else 'B' if x > 80 else 'C')
print(df)

    Name  Score  Age  Height Gender Grade
0  Alice     85   20     165      F     B
1    Bob     90   22     180      M     B
2  Alice     78   20     165      F     C
3    Bob     92   22     180      M     A
4  Alice     88   20     165      F     B


In [76]:
view = df[:]
view

Unnamed: 0,Name,Score,Age,Height,Gender,Grade
0,Alice,85,20,165,F,B
1,Bob,90,22,180,M,B
2,Alice,78,20,165,F,C
3,Bob,92,22,180,M,A
4,Alice,88,20,165,F,B


In [77]:
view.iloc[0, 3] = 95
df

# View is a refernce to the original df

Unnamed: 0,Name,Score,Age,Height,Gender,Grade
0,Alice,85,20,95,F,B
1,Bob,90,22,180,M,B
2,Alice,78,20,165,F,C
3,Bob,92,22,180,M,A
4,Alice,88,20,165,F,B


In [78]:
copy = df.copy()
copy.iloc[1, 1] = 86
print(copy)
df

# Copy take a copy in to that variable 

    Name  Score  Age  Height Gender Grade
0  Alice     85   20      95      F     B
1    Bob     86   22     180      M     B
2  Alice     78   20     165      F     C
3    Bob     92   22     180      M     A
4  Alice     88   20     165      F     B


Unnamed: 0,Name,Score,Age,Height,Gender,Grade
0,Alice,85,20,95,F,B
1,Bob,90,22,180,M,B
2,Alice,78,20,165,F,C
3,Bob,92,22,180,M,A
4,Alice,88,20,165,F,B
