In [1]:
# Importing Pandas
import pandas as pd

In [2]:
# Creating DataFrames
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, 30, 35],
    'City': ['New York', 'Los Angeles', 'Chicago']
}

df = pd.DataFrame(data)
print(df)


      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [3]:
# Viewing Data
# Display the first few rows
print(df.head())

# Display the last few rows
print(df.tail())

# Display information about the DataFrame
print(df.info())

# Display basic statistics
print(df.describe())


      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 3 entries, 0 to 2
Data columns (total 3 columns):
 #   Column  Non-Null Count  Dtype 
---  ------  --------------  ----- 
 0   Name    3 non-null      object
 1   Age     3 non-null      int64 
 2   City    3 non-null      object
dtypes: int64(1), object(2)
memory usage: 204.0+ bytes
None
        Age
count   3.0
mean   30.0
std     5.0
min    25.0
25%    27.5
50%    30.0
75%    32.5
max    35.0


In [4]:
# Selecting Data
# Select a single column
print(df['Name'])

# Select multiple columns
print(df[['Name', 'Age']])


0      Alice
1        Bob
2    Charlie
Name: Name, dtype: object
      Name  Age
0    Alice   25
1      Bob   30
2  Charlie   35


In [5]:
# Selecting Rows
# Select rows by index
print(df.iloc[0])  # First row
print(df.iloc[0:2])  # First two rows

# Select rows by label
print(df.loc[0])  # First row (if the index is the default integer index)


Name       Alice
Age           25
City    New York
Name: 0, dtype: object
    Name  Age         City
0  Alice   25     New York
1    Bob   30  Los Angeles
Name       Alice
Age           25
City    New York
Name: 0, dtype: object


In [6]:
# Conditional Selection
# Rows where Age is greater than 30
print(df[df['Age'] > 30])


      Name  Age     City
2  Charlie   35  Chicago


In [7]:
# Adding Columns
# Adding a new column
df['Country'] = ['USA', 'USA', 'USA']
print(df)


      Name  Age         City Country
0    Alice   25     New York     USA
1      Bob   30  Los Angeles     USA
2  Charlie   35      Chicago     USA


In [8]:
# Removing Columns
# Removing a column
df = df.drop('Country', axis=1)
print(df)


      Name  Age         City
0    Alice   25     New York
1      Bob   30  Los Angeles
2  Charlie   35      Chicago


In [9]:
# New row to add
new_row = pd.DataFrame({'Name': ['David'], 'Age': [40], 'City': ['San Francisco']})

# Append the new row
df = pd.concat([df, new_row], ignore_index=True)
print(df)


      Name  Age           City
0    Alice   25       New York
1      Bob   30    Los Angeles
2  Charlie   35        Chicago
3    David   40  San Francisco


In [10]:
# Removing Rows
# Removing a row by index
df = df.drop(0)  # Removes the first row
print(df)


      Name  Age           City
1      Bob   30    Los Angeles
2  Charlie   35        Chicago
3    David   40  San Francisco


In [11]:
# Handling Missing Data
# Creating a DataFrame with missing values
data = {
    'Name': ['Alice', 'Bob', 'Charlie'],
    'Age': [25, None, 35],
    'City': ['New York', 'Los Angeles', None]
}
df = pd.DataFrame(data)

# Filling missing values
df_filled = df.fillna({'Age': 30, 'City': 'Unknown'})
print(df_filled)

# Dropping rows with missing values
df_dropped = df.dropna()
print(df_dropped)


      Name   Age         City
0    Alice  25.0     New York
1      Bob  30.0  Los Angeles
2  Charlie  35.0      Unknown
    Name   Age      City
0  Alice  25.0  New York


In [12]:
# Group by 'City' and calculate the mean for numeric columns
grouped_df = df.groupby('City').mean(numeric_only=True)
print(grouped_df)

# Grouping by multiple columns and calculating various statistics
# Group by 'City' and 'Age' and calculate various statistics
agg_df = df.groupby(['City', 'Age']).agg({
    'Age': ['mean', 'min', 'max'],
    'Name': 'count'
}).reset_index()

print(agg_df)


              Age
City             
Los Angeles   NaN
New York     25.0
       City   Age                    Name
                   mean   min   max count
0  New York  25.0  25.0  25.0  25.0     1


In [13]:
# Merging and Joining DataFrames
# Creating two DataFrames
df1 = pd.DataFrame({
    'ID': [1, 2, 3],
    'Name': ['Alice', 'Bob', 'Charlie']
})
df2 = pd.DataFrame({
    'ID': [1, 2, 4],
    'Age': [25, 30, 40]
})

# Merging DataFrames
merged_df = pd.merge(df1, df2, on='ID', how='inner')
print(merged_df)

# Concatenating DataFrames
concatenated_df = pd.concat([df1, df2], axis=0)
print(concatenated_df)


   ID   Name  Age
0   1  Alice   25
1   2    Bob   30
   ID     Name   Age
0   1    Alice   NaN
1   2      Bob   NaN
2   3  Charlie   NaN
0   1      NaN  25.0
1   2      NaN  30.0
2   4      NaN  40.0


In [14]:
# Time Series Data
# Creating a time series
dates = pd.date_range('20210101', periods=6)
df_ts = pd.DataFrame({
    'Date': dates,
    'Value': [1, 2, 3, 4, 5, 6]
})
df_ts = df_ts.set_index('Date')
print(df_ts)

# Resampling time series data
resampled_df = df_ts.resample('D').sum()
print(resampled_df)


            Value
Date             
2021-01-01      1
2021-01-02      2
2021-01-03      3
2021-01-04      4
2021-01-05      5
2021-01-06      6
            Value
Date             
2021-01-01      1
2021-01-02      2
2021-01-03      3
2021-01-04      4
2021-01-05      5
2021-01-06      6


In [15]:
# CSV
# Writing to a CSV file
df.to_csv('data.csv', index=False)

# Reading from a CSV file
df = pd.read_csv('data.csv')

In [16]:
# Excel
# Writing to an Excel file
df.to_excel('data.xlsx', index=False)

# Reading from an Excel file
df = pd.read_excel('data.xlsx')