# 1. Python for Data Science
Useful Libraries and Commands

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

# Load a dataset
df = pd.read_csv('your_dataset.csv')

# Explore the dataset
df.head()
df.describe()
df.info()
df.isnull().sum()
df['column_name'].value_counts()

# 2. Exploratory Data Analysis

In [None]:
# Histograms and boxplots
df['column_name'].hist()
sns.boxplot(x='column_name', data=df)

# Correlation heatmap
sns.heatmap(df.corr(), annot=True)

# 3. Statistics & Probability

In [None]:
mean_val = df['column_name'].mean()
median_val = df['column_name'].median()
std_dev = df['column_name'].std()

print("Mean:", mean_val, "Median:", median_val, "StdDev:", std_dev)

# 4. Feature Engineering

In [None]:
from sklearn.preprocessing import StandardScaler, LabelEncoder

scaler = StandardScaler()
df[['scaled_column']] = scaler.fit_transform(df[['column_name']])

encoder = LabelEncoder()
df['encoded_col'] = encoder.fit_transform(df['categorical_col'])

# 5. Model Building

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression

X = df.drop('target', axis=1)
y = df['target']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2)
model = LogisticRegression().fit(X_train, y_train)

# 6. Model Evaluation

In [None]:
from sklearn.metrics import accuracy_score, classification_report

y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print(classification_report(y_test, y_pred))

# 7. Cross Validation & Hyperparameter Tuning

In [None]:
from sklearn.model_selection import GridSearchCV

param_grid = {'C': [0.1, 1, 10]}
grid = GridSearchCV(LogisticRegression(), param_grid, cv=5)
grid.fit(X_train, y_train)
print("Best Parameters:", grid.best_params_)

# 8. NLP Basics

In [None]:
import nltk
from sklearn.feature_extraction.text import TfidfVectorizer

nltk.download('punkt')
corpus = ["This is an example.", "Data Science is fun!"]
vectorizer = TfidfVectorizer()
X_vec = vectorizer.fit_transform(corpus)
print(X_vec.toarray())

# 9. Time Series Analysis

In [None]:
df['Date'] = pd.to_datetime(df['Date'])
df.set_index('Date', inplace=True)
df['column_name'].plot(figsize=(10, 4), title='Time Series Plot')