In [None]:
# Handling missing values by either filling with a placeholder or dropping rows that contain missing values.
import pandas as pd

# Create DataFrame with missing values
data = {'Name': ['Alice', 'Bob', None], 'Age': [25, None, 35], 'City': ['New York', 'San Francisco', None]}
df = pd.DataFrame(data)

# Fill missing values with a placeholder
df_filled = df.fillna('Unknown')
print("Filled Missing Values:\n", df_filled)

# Drop rows with missing values
df_dropped = df.dropna()
print("Dropped Missing Values:\n", df_dropped)


In [None]:
# Scaling features using StandardScaler (standardization) and MinMaxScaler (normalization) from Scikit-learn.
from sklearn.preprocessing import StandardScaler, MinMaxScaler

# Example data
data = [[10, 200], [15, 400], [20, 800]]
df = pd.DataFrame(data, columns=['Feature1', 'Feature2'])

# Standardization (mean = 0, variance = 1)
scaler = StandardScaler()
standardized = scaler.fit_transform(df)
print("Standardized Data:\n", standardized)

# Normalization (values between 0 and 1)
scaler = MinMaxScaler()
normalized = scaler.fit_transform(df)
print("Normalized Data:\n", normalized)


In [None]:
# Encoding categorical variables using one-hot encoding to convert categories into a numerical format.
# Useful for machine learning algorithms that require numeric input.
# Example DataFrame with categorical data
data = {'City': ['New York', 'San Francisco', 'Los Angeles'], 'Temperature': [21, 18, 25]}
df = pd.DataFrame(data)

# One-hot encoding
df_encoded = pd.get_dummies(df, columns=['City'])
print("One-hot Encoded Data:\n", df_encoded)


In [None]:
# Splitting the dataset into training and testing sets using Scikit-learn’s train_test_split function.
# Useful to evaluate the performance of machine learning models by testing on unseen data.
from sklearn.model_selection import train_test_split

# Example data
X = df_encoded.drop('Temperature', axis=1)  # Features
y = df_encoded['Temperature']  # Target variable

# Split the data (80% training, 20% testing)
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

print("Training Data Features:\n", X_train)
print("Testing Data Features:\n", X_test)
