# Python core and general purpose libraries

## Numpy 
### (Numerical Operations, Arrays)

In [None]:
import numpy as np

# Create a 1-dimensional NumPy array
arr_1d = np.array([1, 2, 3, 4, 5])
print("NumPy 1D Array:\n", arr_1d)
# Numpy arrays are homogeneous (all elements of the same type) and allow for
# vectorized operations, which are much faster than Python lists for large datasets.

# Create a 2-dimensional NumPy array (matrix)
arr_2d = np.array([[1, 2, 3], [4, 5, 6]])
print("\nNumPy 2D Array:\n", arr_2d)
# Used for representing matrices and higher-dimensional data

## Pandas
### (Data analysis, manipulation)

In [None]:
import pandas as pd

# Create a Pandas DataFrame from a dictionary
data = {
    'Name': ['Alice', 'Bob', 'Charlie', 'David'],
    'Age': [25, 30, 35, 28],
    'City': ['New York', 'London', 'Paris', 'Tokyo']
}
df = pd.DataFrame(data)
print("\nPandas DataFrame:\n", df)
# Explanation: DataFrames are tabular data structures, similar to spreadsheets or SQL tables.
# They provide powerful tools for data manipulation, cleaning, analysis, and visualization.

# Select a column
print("\n'Name' column from DataFrame:\n", df['Name'])
print("\n'Age' column (values only) for DF:\n", df['Age'].values)
# Explanation: Easy and intuitive way to access specific columns (Series) of the DataFrame.

## Matplotlib
### (Plotting, Visualization)

In [None]:
import matplotlib.pyplot as plt

# Generate some data
x = np.linspace(0, 10, 100) # 100 evenly spaced points, from 0 - 10
y = np.sin(x) # sine wave

# Create a simple line plot
plt.figure(figsize=(8, 4))
plt.plot(x, y, label='sin(x)', color='blue', linestyle='--')
plt.xlabel("X-axis")
plt.xlabel("Y-axis")
plt.title("Sine Wave Plot")
plt.legend()
plt.grid(True)
plt.show()

## Scikit-learn
### (Machine Learning Algorithms)

In [None]:
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score
from sklearn.datasets import load_iris # a sample datset

# Load the sample data
iris = load_iris()
X = iris.data # features, capital X
y = iris.target # the labels to predict, lowercase y

# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
# train_test_split is crucial for evaluating model performance on unseen data, preventing overfitting.

# Create a Logistic Regression model
model = LogisticRegression(max_iter=200) # increase max_iter to 200 for multi class problems
# Instantiating a machine learning model.

# Train the Model
model.fit(X_train, y_train) # .fit() trains model using provided training data.
# Learns patters from X_train to predict y_train

# Make Predicitons on test set
y_pred = model.predict(X_test) # .predict() uses trained model to make predictions on unseen data

# Evaluate model's accuracy
accuracy = accuracy_score(y_test, y_pred)
print(f"\Sklearn Logistic Regression accuracy: {accuracy:.2f}") # Accuracy to 2 decimals

In [None]:
from sklearn.metrics import classification_report, confusion_matrix

# Print predictions vs actuals
print("\nPredicted labels:", y_pred)
print("Actual labels:   ", y_test)

cm = confusion_matrix(y_test, y_pred)
print("\nConfusion Matrix:\n", cm)

print("\nClassification Report:\n", classification_report(y_test, y_pred, target_names=iris.target_names))

df_cm = pd.DataFrame(cm, index=iris.target_names, columns=iris.target_names)
print("\nConfusion Matrix (labeled):\n", df_cm)

## Tensorflow
### (Deep learning platform, includes Keras)

In [None]:
import tensorflow as tf

# Define simple sequential
tf_model = tf.keras.Sequential([
    tf.keras.layers.Dense(units=10, activation='relu', input_shape=(4,)), # Input layer with 4 features
    tf.keras.layers.Dense(units=3, activation='softmax') # Output layer for 3 classes
])
# keras.Sequential allows building neural networks layer by layer. Dense layers are fully connected.
# 'relu' is a common activation function. 'softmax' is for multi-classification outputs (probabilities)

# Compile the model configures it for training
tf_model.compile(optimizer='adam', # optimizer defines how weights are updated,
                loss='sparse_categorical_crossentropy', # loss measures for error
                metrics=['accuracy']) # metrics evaluated at training

# Provides summary of model architecture, layer types, output shapes, trainable parameters.
tf_model.summary()

For tensorflow ouput:
- Each Dense Layer is shown above.
- Dense(units=X) means X neurons, each connected to all inputs. 
- Each neuron has:
    - one weight per feature
    - one bias.
- Params = weights = biases

## Pytorch
### (Flexible deep learning)

In [None]:
import torch
import torch.nn as nn # module provides buildign blocks for Neural Networks (NNs)
import torch.optim as optim

# Createa a simple tensor optimized for deep learning on GPUs
pt_tensor = torch.tensor([[1.0, 2.0], [3.0, 4.0]])
print("\nPytorch Tensor:\n", pt_tensor)
print("\n2D tensor/matrix, shape is 2,2, type is a torch.float32 because of 1.")

# Define a simple NN class for binary classification
# Inherits from the nn.Module, base class for all PyTorch NNs
class SimpleNN(nn.Module):
    def __init__(self): # intialize this class
        super(SimpleNN, self).__init__() # super gives your class access to nn.Module parent
        self.fc1 = nn.Linear(10, 5) # Fully connected dense layer, Input 10 features, output 5
        self.relu = nn.ReLU() # non-linear activation function
        self.fc2 = nn.Linear(5, 1) # Another fully connected layer, for binary classif
        self.sigmoid = nn.Sigmoid() # sigmoid squashes numbers between 0 and 1, probability

    def forward(self, x): # the brain, how model makes predictions
        x = self.fc1(x) # Apply first layer (10 -> 5)
        x = self.relu(x) # Apply ReLU activation
        x = self.fc2(x) # Apply second layer (5 -> 1)
        x = self.sigmoid(x) # Return the result into a probability
        return x # output prediction


In [None]:
# Instantiate the model
pt_model = SimpleNN()
print("\nPyTorch Model Architecture:\n", pt_model)
# Pytorch models are classes inheriting from the nn.Module
# __init__ defines the layers and forward defines the data flow through the network

## XGBoost
### (Fast, powerful boosting)

In [None]:
import xgboost as xgb
from sklearn.datasets import make_classification # Generate synthetic classication

# Generate synthetic data and split
X_synth, y_synth = make_classification(n_samples=1000, n_features=20, n_classes=2, random_state=42)

X_train_xgb, X_test_xgb, y_train_xgb, y_test_xgb = train_test_split(X_synth, y_synth, test_size=0.2, random_state=42)

# Create an XGBoost classifier model
xgb_model = xgb.XGBClassifier(objective='binary:logistic', eval_metric='logloss', use_label_encoder=False, random_state=42)
# objective defines the loss function for binary classification
# use label encoder false - suppress a future deprecation warning

# Train the XGBoost model
xgb_model.fit(X_train_xgb, y_train_xgb)
# .fit() trains the boosting model on training data
# XGBoost builds an ensemble of decision trees sequentially

# Predict
y_pred_xgb = xgb_model.predict(X_test_xgb)

# Evaluate accuracy
accuracy_xgb = accuracy_score(y_test_xgb, y_pred_xgb)
print(f"\nXGBoost Classifier Accuracy: {accuracy_xgb:.3f}")
# XGBoost is a highly efficient and powerful gradient boosting library