##DataFrame Creation and Manipulation

In [3]:
import pandas as pd

# Create a Dataframe from a dictionary
data = {'Name': ['Alice','Bob','Charlie'],
        'Age':[25,30,35],
        'City':['New York','Los Angeles','Chicago']}
df = pd.DataFrame(data)
print(df)

#Add new column
df['Salary']=[70000,80000,90000]
print(df)

#drop a column
df = df.drop('Salary',axis=1)
print(df)

#Rename a column
df = df.rename(columns={'Name':'Full Name'})
print(df)

#Sort a column
df = df.sort_values(by='Age',ascending=False)
print(df)

      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
      Name  Age         City  Salary
0    Alice   25     New York   70000
1      Bob   30  Los Angeles   80000
2  Charlie   35      Chicago   90000
      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
  Full Name  Age         City
0     Alice   25     New York
1       Bob   30  Los Angeles
2   Charlie   35      Chicago
  Full Name  Age         City
2   Charlie   35      Chicago
1       Bob   30  Los Angeles
0     Alice   25     New York


##Data Selection and Filtering

In [4]:
# Select specific rows and columns using .loc and .iloc
# .loc is label based
# .iloc is integer based
Selected_data_loc = df.loc[0:1,['Full Name','Age']]
print(Selected_data_loc)

Selected_data_iloc = df.iloc[0:2,0:2]
print(Selected_data_iloc)

# Filter rows based on a condition
filtered_data = df[df['Age']>25]
print(filtered_data)

# Filter rows based on multiple conditions
filtered_data = df[(df['Age']>25) & (df['City']=='New York')]
print(filtered_data)


Empty DataFrame
Columns: [Full Name, Age]
Index: []
  Full Name  Age
2   Charlie   35
1       Bob   30
  Full Name  Age         City
2   Charlie   35      Chicago
1       Bob   30  Los Angeles
Empty DataFrame
Columns: [Full Name, Age, City]
Index: []


##Data Aggregation and Grouping

In [5]:
# Group data by a column and compute aggregate statistics (mean, sum, count)
df = pd.DataFrame({'City': ['New York', 'Los Angeles', 'Chicago', 'New York', 'Chicago'],
                   'Age': [25, 30, 35, 40, 50]})
grouped_data = df.groupby('City').agg({'Age':['mean','sum','count']})
print(grouped_data)

              Age          
             mean sum count
City                       
Chicago      42.5  85     2
Los Angeles  30.0  30     1
New York     32.5  65     2


In [8]:
# Pivot a df
pivot_df = df.pivot_table(index='City',values='Age',aggfunc='mean')
print(pivot_df)

              Age
City             
Chicago      42.5
Los Angeles  30.0
New York     32.5


##Merging and Joining DataFrames

In [9]:
# Merge two DataFrames on a common column
df1 = pd.DataFrame({'ID': [1, 2, 3],
                    'Name': ['Alice', 'Bob', 'Charlie']})
df2 = pd.DataFrame({'ID': [1, 2, 4],
                    'Age': [25, 30, 40]})
merged_df = pd.merge(df1, df2, on='ID')
print(merged_df)

# Join two DataFrames based on indexes
inner_join = pd.merge(df1,df2, on ='ID', how='inner')
outer_join = pd.merge(df1, df2, on='ID', how='outer')
left_join = pd.merge(df1, df2, on='ID', how='left')
right_join = pd.merge(df1, df2, on='ID', how='right')
print(inner_join, outer_join, left_join, right_join, sep='\n')

   ID   Name  Age
0   1  Alice   25
1   2    Bob   30
   ID   Name  Age
0   1  Alice   25
1   2    Bob   30
   ID     Name   Age
0   1    Alice  25.0
1   2      Bob  30.0
2   3  Charlie   NaN
3   4      NaN  40.0
   ID     Name   Age
0   1    Alice  25.0
1   2      Bob  30.0
2   3  Charlie   NaN
   ID   Name  Age
0   1  Alice   25
1   2    Bob   30
2   4    NaN   40
