<a href="https://colab.research.google.com/github/lohith-00/pandas/blob/main/lohith_pandas.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [2]:
import pandas as pd
import numpy as np

# 1. Getting Familiar with Pandas
data_series = pd.Series([10, 20, 30, 40, 50], index=['a', 'b', 'c', 'd', 'e'])
print("Series:\n", data_series)

data_dict = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 40],
    'Salary': [50000, 60000, 70000, 80000]
}
df = pd.DataFrame(data_dict)
print("\nDataFrame:\n", df)

# 2. Data Handling with Pandas
data_list_of_dicts = [
    {'Name': 'Eve', 'Age': 45, 'Salary': 90000},
    {'Name': 'Frank', 'Age': 50, 'Salary': 100000},
    {'Name': 'Grace', 'Age': 29, 'Salary': 55000}
]
df_list_of_dicts = pd.DataFrame(data_list_of_dicts)
print("\nDataFrame from list of dictionaries:\n", df_list_of_dicts)

data_dict_of_lists = {
    'Name': ['Hannah', 'Isaac', 'Judy'],
    'Age': [27, 33, 29],
    'Salary': [65000, 70000, 60000]
}
df_dict_of_lists = pd.DataFrame(data_dict_of_lists)
print("\nDataFrame from dictionary of lists:\n", df_dict_of_lists)

df_missing = pd.DataFrame({
    'Name': ['Kelly', 'Leo', 'Mia'],
    'Age': [None, 28, 32],
    'Salary': [None, 72000, 65000]
})
print("\nDataFrame with missing data:\n", df_missing)

df_missing['Age'].fillna(df_missing['Age'].mean(), inplace=True)
df_missing['Salary'].fillna(df_missing['Salary'].mean(), inplace=True)
print("\nDataFrame after filling missing data:\n", df_missing)

df_cleaned = df_missing.dropna()
print("\nDataFrame after dropping rows with missing data:\n", df_cleaned)

df_missing['Age'] = df_missing['Age'].astype(int)
print("\nDataFrame with 'Age' as integer:\n", df_missing)

# 3. Data Analysis with Pandas
print("\nSummary Statistics:\n", df.describe())

df_merged = pd.merge(df, df_list_of_dicts, on='Name', how='outer', suffixes=('_df1', '_df2'))
print("\nMerged DataFrame:\n", df_merged)

Series:
 a    10
b    20
c    30
d    40
e    50
dtype: int64

DataFrame:
       Name  Age  Salary
0    Alice   25   50000
1      Bob   30   60000
2  Charlie   35   70000
3    David   40   80000

DataFrame from list of dictionaries:
     Name  Age  Salary
0    Eve   45   90000
1  Frank   50  100000
2  Grace   29   55000

DataFrame from dictionary of lists:
      Name  Age  Salary
0  Hannah   27   65000
1   Isaac   33   70000
2    Judy   29   60000

DataFrame with missing data:
     Name   Age   Salary
0  Kelly   NaN      NaN
1    Leo  28.0  72000.0
2    Mia  32.0  65000.0

DataFrame after filling missing data:
     Name   Age   Salary
0  Kelly  30.0  68500.0
1    Leo  28.0  72000.0
2    Mia  32.0  65000.0

DataFrame after dropping rows with missing data:
     Name   Age   Salary
0  Kelly  30.0  68500.0
1    Leo  28.0  72000.0
2    Mia  32.0  65000.0

DataFrame with 'Age' as integer:
     Name  Age   Salary
0  Kelly   30  68500.0
1    Leo   28  72000.0
2    Mia   32  65000.0

Summary St