# module01_prerequisites


## bash.py
bash.py
Explanation of Bash
Bash is a Unix shell and command language. It's used for scripting and command-line operations.
In Python, we can execute Bash commands using the subprocess module.


In [None]:
import subprocess  # Import the subprocess module to run external commands
import sys  # Import sys module (though not used in this example, commonly needed for system operations)


In [None]:
def run_bash_command(command):  # Define a function to execute Bash commands safely
    """
    Execute a Bash command and return the output.
    """
    try:  # Try block to handle potential errors during command execution
        result = subprocess.run(command, shell=True, capture_output=True, text=True)  # Run the command with shell=True, capture output as text
        if result.returncode == 0:  # Check if the command executed successfully (return code 0 means success)
            return result.stdout.strip()  # Return the standard output, stripped of whitespace
        else:  # If the command failed (non-zero return code)
            return f"Error: {result.stderr.strip()}"  # Return the error message from stderr
    except Exception as e:  # Catch any exceptions that might occur
        return f"Exception: {str(e)}"  # Return the exception message as a string


In [None]:
# Examples of common Bash commands
print("1. List files in current directory:")  # Print a header for the first example
output = run_bash_command("ls -la")  # Execute the 'ls -la' command to list all files with details
print(output)  # Print the output of the command


In [None]:
print("\n2. Check current working directory:")  # Print a header for the second example
output = run_bash_command("pwd")  # Execute the 'pwd' command to show current directory path
print(output)  # Print the output of the command


In [None]:
print("\n3. Echo a message:")  # Print a header for the third example
output = run_bash_command("echo 'Hello from Bash!'")  # Execute the 'echo' command to display a message
print(output)  # Print the output of the command


In [None]:
print("\n4. Check system information:")  # Print a header for the fourth example
output = run_bash_command("uname -a")  # Execute the 'uname -a' command to show system information
print(output)  # Print the output of the command


Bash scripting concepts:
- Variables: var="value"; echo $var
- Loops: for i in {1..5}; do echo $i; done
- Conditionals: if [ condition ]; then ... fi
- Functions: function_name() { commands; }
- Pipes: command1 | command2
- Redirection: command > file.txt


## numpy_basics.py
numpy.py
Explanation of NumPy
NumPy is a fundamental package for scientific computing in Python.
It provides support for large, multi-dimensional arrays and matrices,
along with mathematical functions to operate on these arrays.


In [None]:
import numpy as np  # Import NumPy with the standard alias 'np'


In [None]:
# Creating arrays
print("Creating Arrays:")  # Print section header
array1d = np.array([1, 2, 3, 4, 5])  # Create a 1-dimensional array from a Python list
array2d = np.array([[1, 2, 3], [4, 5, 6]])  # Create a 2-dimensional array (matrix) from nested lists
zeros_array = np.zeros((3, 3))  # Create a 3x3 array filled with zeros
ones_array = np.ones((2, 4))  # Create a 2x4 array filled with ones
random_array = np.random.rand(3, 2)  # Create a 3x2 array with random values between 0 and 1


In [None]:
print("1D array:", array1d)  # Print the 1D array
print("2D array:\n", array2d)  # Print the 2D array with newline for formatting
print("Zeros array:\n", zeros_array)  # Print the zeros array
print("Ones array:\n", ones_array)  # Print the ones array
print("Random array:\n", random_array)  # Print the random array


In [None]:
# Array operations
print("\nArray Operations:")  # Print section header
a = np.array([1, 2, 3])  # Create first array for operations
b = np.array([4, 5, 6])  # Create second array for operations


In [None]:
print("Addition:", a + b)  # Element-wise addition of arrays
print("Multiplication:", a * b)  # Element-wise multiplication of arrays
print("Dot product:", np.dot(a, b))  # Dot product (scalar product) of the two arrays


In [None]:
# Matrix operations
matrix1 = np.array([[1, 2], [3, 4]])  # Create first 2x2 matrix
matrix2 = np.array([[5, 6], [7, 8]])  # Create second 2x2 matrix


In [None]:
print("\nMatrix Operations:")  # Print section header
print("Matrix 1:\n", matrix1)  # Print first matrix
print("Matrix 2:\n", matrix2)  # Print second matrix
print("Matrix multiplication:\n", np.dot(matrix1, matrix2))  # Matrix multiplication (not element-wise)


In [None]:
# Statistical operations
data = np.random.randn(1000)  # Generate 1000 random numbers from standard normal distribution
print("\nStatistical Operations:")  # Print section header
print("Mean:", np.mean(data))  # Calculate and print the mean (average) of the data
print("Standard deviation:", np.std(data))  # Calculate and print the standard deviation
print("Min:", np.min(data))  # Find and print the minimum value
print("Max:", np.max(data))  # Find and print the maximum value


In [None]:
# Indexing and slicing
print("\nIndexing and Slicing:")  # Print section header
arr = np.arange(10)  # Create array [0, 1, 2, ..., 9] using arange function
print("Original array:", arr)  # Print the original array
print("First 5 elements:", arr[:5])  # Slice first 5 elements (0 to 4)
print("Elements from index 2 to 7:", arr[2:8])  # Slice elements from index 2 to 7 (inclusive start, exclusive end)
print("Every other element:", arr[::2])  # Slice every other element (step of 2)


In [None]:
# Reshaping
print("\nReshaping:")  # Print section header
arr_1d = np.arange(12)  # Create 1D array with 12 elements [0, 1, 2, ..., 11]
arr_2d = arr_1d.reshape(3, 4)  # Reshape the 1D array into a 3x4 2D array
print("1D array:", arr_1d)  # Print the original 1D array
print("Reshaped to 2D:\n", arr_2d)  # Print the reshaped 2D array


Key NumPy concepts:
- ndarray: N-dimensional array object
- Vectorized operations: Element-wise operations without loops
- Broadcasting: Automatic expansion of arrays for operations
- Universal functions (ufuncs): Fast element-wise operations
- Linear algebra, FFT, random number generation


## scikit_learn.py
scikit_learn.py
Explanation of Scikit-learn
Scikit-learn is an open-source machine learning library for Python.
It provides simple and efficient tools for data mining and data analysis.


In [None]:
from sklearn.datasets import make_classification  # Import function to generate synthetic classification datasets
from sklearn.model_selection import train_test_split  # Import function to split data into train/test sets
from sklearn.linear_model import LogisticRegression  # Import logistic regression classifier
from sklearn.metrics import accuracy_score, classification_report  # Import metrics for model evaluation
from sklearn.preprocessing import StandardScaler  # Import standard scaler for feature normalization
from sklearn.pipeline import Pipeline  # Import pipeline for chaining preprocessing and model steps


In [None]:
# Generate sample data
X, y = make_classification(n_samples=1000, n_features=20, n_informative=10, random_state=42)  # Generate 1000 samples with 20 features, 10 informative, for reproducible results


In [None]:
# Split data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)  # Split data into 80% training and 20% testing sets


In [None]:
# Create a pipeline with scaling and logistic regression
pipeline = Pipeline([  # Create a pipeline that chains preprocessing and model steps
    ('scaler', StandardScaler()),  # First step: standardize features by removing mean and scaling to unit variance
    ('classifier', LogisticRegression(random_state=42))  # Second step: logistic regression classifier
])


In [None]:
# Train the model
pipeline.fit(X_train, y_train)  # Fit the pipeline (scaler + classifier) on training data


In [None]:
# Make predictions
y_pred = pipeline.predict(X_test)  # Use the trained pipeline to predict labels for test data


In [None]:
# Evaluate
accuracy = accuracy_score(y_test, y_pred)  # Calculate accuracy: fraction of correct predictions
print(f"Accuracy: {accuracy:.3f}")  # Print accuracy with 3 decimal places


In [None]:
print("\nClassification Report:")  # Print header for detailed classification metrics
print(classification_report(y_test, y_pred))  # Print precision, recall, f1-score for each class


In [None]:
# Example of cross-validation
from sklearn.model_selection import cross_val_score  # Import cross-validation function


In [None]:
scores = cross_val_score(pipeline, X, y, cv=5)  # Perform 5-fold cross-validation on the full dataset
print(f"\nCross-validation scores: {scores}")  # Print the 5 accuracy scores from cross-validation
print(f"Mean CV score: {scores.mean():.3f} (+/- {scores.std() * 2:.3f})")  # Print mean and 95% confidence interval


Key components of scikit-learn:
- Datasets: make_classification, load_iris, etc.
- Preprocessing: StandardScaler, MinMaxScaler, etc.
- Model selection: train_test_split, cross_val_score, GridSearchCV
- Supervised learning: LinearRegression, RandomForestClassifier, SVM
- Unsupervised learning: KMeans, PCA
- Metrics: accuracy_score, confusion_matrix, etc.


## sql.py
sql.py
Explanation of SQL (Structured Query Language)
SQL is used to communicate with relational databases.
It allows creating, reading, updating, and deleting data.


In [None]:
import sqlite3  # Import SQLite3 module for database operations


In [None]:
def create_database():  # Define function to create and populate a sample database
    """
    Create an in-memory SQLite database and table for demonstration.
    """
    conn = sqlite3.connect(':memory:')  # Create an in-memory database connection (data exists only in RAM)
    cursor = conn.cursor()  # Create a cursor object to execute SQL commands

    # Create table
    cursor.execute('''
        CREATE TABLE users (
            id INTEGER PRIMARY KEY,
            name TEXT NOT NULL,
            age INTEGER,
            email TEXT
        )
    ''')

    # Insert sample data
    sample_data = [  # Define sample data as a list of tuples
        ('Alice', 25, 'alice@example.com'),  # First user record
        ('Bob', 30, 'bob@example.com'),  # Second user record
        ('Charlie', 35, 'charlie@example.com')  # Third user record
    ]
    cursor.executemany('INSERT INTO users (name, age, email) VALUES (?, ?, ?)', sample_data)  # Insert multiple records using parameterized query

    conn.commit()  # Commit the transaction to save changes
    return conn, cursor  # Return connection and cursor for further operations

In [None]:
conn, cursor = create_database()

In [None]:
def execute_query(cursor, query, params=None):  # Define helper function to execute queries
    """
    Execute a SQL query and return results.
    """
    if params:  # Check if parameters are provided for parameterized query
        cursor.execute(query, params)  # Execute query with parameters to prevent SQL injection
    else:  # If no parameters provided
        cursor.execute(query)  # Execute query directly
    return cursor.fetchall()  # Return all results as a list of tuples


In [None]:
# Examples of SQL operations
print("1. SELECT all users:")  # Print header for first example
results = execute_query(cursor, "SELECT * FROM users")  # Execute SELECT query to get all users
for row in results:  # Iterate through each result row
    print(row)  # Print the entire row (tuple)


In [None]:
print("\n2. SELECT users older than 28:")  # Print header for second example
results = execute_query(cursor, "SELECT name, age FROM users WHERE age > ?", (28,))  # Execute SELECT with WHERE clause and parameter
for row in results:  # Iterate through filtered results
    print(row)  # Print name and age of users older than 28


In [None]:
print("\n3. UPDATE user age:")  # Print header for third example
cursor.execute("UPDATE users SET age = ? WHERE name = ?", (26, 'Alice'))  # Execute UPDATE query to change Alice's age
conn.commit()  # Commit the update transaction
results = execute_query(cursor, "SELECT name, age FROM users WHERE name = 'Alice'")  # Query to verify the update
print(results)  # Print the updated record


In [None]:
print("\n4. DELETE a user:")  # Print header for fourth example
cursor.execute("DELETE FROM users WHERE name = 'Charlie'")  # Execute DELETE query to remove Charlie
conn.commit()  # Commit the delete transaction
results = execute_query(cursor, "SELECT COUNT(*) FROM users")  # Count remaining users
print(f"Remaining users: {results[0][0]}")  # Print the count of remaining users


In [None]:
# Close connection
conn.close()  # Close the database connection to free resources


SQL concepts:
- DDL: CREATE, ALTER, DROP (Data Definition Language)
- DML: SELECT, INSERT, UPDATE, DELETE (Data Manipulation Language)
- DCL: GRANT, REVOKE (Data Control Language)
- TCL: COMMIT, ROLLBACK (Transaction Control Language)
