In [None]:
from google.colab import drive

# Mount Google Drive
drive.mount('/content/drive')

# Navigate to the desired directory
import os
os.chdir('/content/drive/My Drive/')

# Verify the current working directory
!pwd

In [None]:
# Import necessary libraries
import pandas as pd
import seaborn as sns

In [None]:
# Load a CSV file into a DataFrame
df = pd.read_csv('your_file.csv')

In [None]:
# Display basic information about the DataFrame
df.info()

In [None]:
# Describe a specific column in the DataFrame
df['your_column'].describe()

In [None]:
# Check for null or NaN values in the DataFrame
df.isnull().sum()

In [None]:
# Drop rows with NaN values
# Explanation:
# In this example, we are choosing to drop rows that contain NaN values because they might represent incomplete or missing data that could skew our analysis. 
# It's important to assess the impact of dropping these rows on the overall dataset. If a large portion of data is missing, other methods such as filling in missing values might be considered.
df = df.dropna()

In [None]:

# Renaming a column
# To rename a column, use the `rename` method and specify the old and new column names
df.rename(columns={'old_column_name': 'new_column_name'}, inplace=True)

# Explanation of `inplace`:
# The `inplace` parameter in pandas methods determines whether the operation is performed on the original DataFrame or if a new DataFrame is returned. 
# When `inplace=True`, the changes are applied directly to the original DataFrame, and nothing is returned. This can be useful to save memory and avoid creating additional copies of the DataFrame.
# If `inplace=False` (the default), the method returns a new DataFrame with the changes applied, leaving the original DataFrame unchanged. 
# Use `inplace=True` when you want to modify the existing DataFrame directly without needing a new copy.


In [None]:
# Converting a date column from string to datetime
# Assume the date column is named 'date_column' and is currently in string format
# If the date format is not standard, specify the format explicitly
# Example formats: 'YYYY-MM-DD', 'DD/MM/YYYY', 'MM-DD-YYYY', 'YYYY/MM/DD HH:MM:SS', etc.
df['datetime'] = pd.to_datetime(df['date_column'], format='%Y-%m-%d')

# If the date column has multiple formats, use `errors='coerce'` to handle invalid parsing
# df['datetime'] = pd.to_datetime(df['date_column'], errors='coerce')


In [None]:
# Check the type of a column
print(df['number_column'].dtype)

# Convert a column containing numbers in string format to a numerical type
# Use `errors='coerce'` to handle invalid parsing
df['number_column'] = pd.to_numeric(df['number_column'], errors='coerce')

# Display the type of the column after conversion
print(df['number_column'].dtype)