# **Basic Python**

## 1.a 
Give a function F that takes a single argument n. This function should print a sequence of n strings as described in the examples below: <br>
Ex: **n = 3**

--A--<br>
-BAB-<br>
CBABC<br>

**n = 4**

---A---<br>
--BAB--<br>
-CBABC-<br>
DCBABCD

In [None]:
def F(n):
    for i in range(n):
        padding = '-' * (n - i - 1)
        center = ''.join(chr(65 + abs(j)) for j in range(-i, i + 1))
        print(f"{padding}{center}{padding}")


# Tests
F(10)
F(6)

## 1.b
Make a function F that takes only one argument, a dictionary(dict) d.
The keys of d are integers and the values of d are a tuple of type (x (int), y (int)).
You must print out the dict in the format "-key-, -x-, -y-" with each entry in a new line. Print it for each of the three sorted orders, by key values ascending, by x values descending, by y values ascending.

In [None]:
def F(d : dict):
    print("Sorted by keys (ascending):")
    for key, value in sorted(d.items()):
        print(f"-{key}-, -{value[0]}-, -{value[1]}-")

    print("\nSorted by x values (descending):")
    for key, value in sorted(d.items(), key=lambda item: item[1][0], reverse=True):
        print(f"-{key}-, -{value[0]}-, -{value[1]}-")

    print("\nSorted by y values (ascending):")
    for key, value in sorted(d.items(), key=lambda item: item[1][1]):
        print(f"-{key}-, -{value[0]}-, -{value[1]}-")

# Tests
F({1 : (1, 2), 2 : (-1, 4), 5 : (-4, 3), 4 : (2, 3)})
F({-8 : (4, 2), 6 : (-3, 4), 7 : (2, 1), 5 : (9, -10)})

## 2 Working with Student Records

Use the data in **student_records.csv** to complete the given tasks. Do not include any external libraries. Use a Python dictionary if required.

### Reference
- [Python Dictionaries](https://www.w3schools.com/python/python_dictionaries.asp)


#### 2.a: open the student_records.csv file and print out the first 10 rows

In [None]:
print(student_data.head(10))

#### 2.b: Print out the total credits and calculte CPI of each student. 
CPI is the weighted average of core courses, and electives (weights being the letter grades converted to number AP,AA=10, AB=9, BB=8, BC=7, CC=6)

In [None]:
grade_to_points = {"AP": 10, "AA": 10, "AB": 9, "BB": 8, "BC": 7, "CC": 6}

student_data['grade_points'] = student_data['grade'].map(grade_to_points)

student_summary = student_data.groupby('roll_number').apply(
    lambda group: pd.Series({
        'total_credits': group['credit'].sum(),
        'CPI': (group['credit'] * group['grade_points']).sum() / group['credit'].sum()
    })).reset_index()

print(student_summary.head(10))


#### 2.c: Print out the roll numbers of all students who meet the graduation requirements 
Atleast 20 credist of core course, 15 credits of department elective, 10 credits of flexible elective and 5 credits of hasmed electives

In [None]:
def meets_graduation_requirements(group):
    core_credits = group[group['course_type'] == 'core']['credit'].sum()
    dept_elective_credits = group[group['course_type'] == 'department_elective']['credit'].sum()
    flexible_elective_credits = group[group['course_type'] == 'flexible_elective']['credit'].sum()
    hasmed_elective_credits = group[group['course_type'] == 'hasmed_elective']['credit'].sum()
    
    return (core_credits >= 20 and 
            dept_elective_credits >= 15 and 
            flexible_elective_credits >= 10 and 
            hasmed_elective_credits >= 5)

graduating_students = student_data.groupby('roll_number').filter(meets_graduation_requirements)

print(graduating_students['roll_number'].unique())

#### 2.d: Print out the roll numbers of all students who completed a minor 
Atleast 10 credits with minor tag in a specific department

In [None]:
def completes_minor(group):
    return group[group['course_type'] == 'minor']['credit'].sum() >= 10

students_with_minor = student_data.groupby('roll_number').filter(completes_minor)

print(students_with_minor['roll_number'].unique())

#### 2.e: Print out the roll numbers of all students who completed a honours
Atleast 10 credits with honours tag and 20 core credits

In [None]:
def earns_honors(group):
    honors_credits = group[group['course_type'] == 'honours']['credit'].sum()
    core_credits = group[group['course_type'] == 'core']['credit'].sum()
    return honors_credits >= 10 and core_credits >= 20

students_with_honors = student_data.groupby('roll_number').filter(earns_honors)

print(students_with_honors['roll_number'].unique())

# **SciPy** 

## Part A
Minimize the function $f(x, y) = 2(x - y - 3)^2 + 4(x + 2y + 1)^4$.<br>
With the constraints : $ x - y \ge -3, (x + 2)^2 + (y + 1)^2 \le 5$ <br>
Using scipy.optimize.minimize with constraints. (Hint: Look at the examples in the official documentation)

In [None]:
import numpy as np
from scipy.optimize import minimize

def objective_function(vars):
    x, y = vars
    return 2 * (x - y - 3)**2 + 4 * (x + 2 * y + 1)**4

def constraint1(vars):
    x, y = vars
    return x - y + 3  

def constraint2(vars):
    x, y = vars
    return 5 - ((x + 2)**2 + (y + 1)**2)  

initial_guess = [0, 0]

constraints = [
    {'type': 'ineq', 'fun': constraint1},  
    {'type': 'ineq', 'fun': constraint2}   
]


result = minimize(objective_function, initial_guess, constraints=constraints, method='SLSQP')


print("Optimal values of x and y:", result.x)
print("Minimum value of the objective function:", result.fun)

# Part B
Evaluate the line integral of the function $f(x, y) = x^2 + y^4$ along the circle $ x^2 + y^2 = 3 $ anticlockwise (scalar integral, not vector). You must use scipy for integration but you may use parameterization

In [None]:
import numpy as np
from scipy.integrate import quad

def f(t):
    sqrt3 = np.sqrt(3)
    x = sqrt3 * np.cos(t)
    y = sqrt3 * np.sin(t)
    return x**2 + y**4

def ds(t):
    sqrt3 = np.sqrt(3)
    dx_dt = -sqrt3 * np.sin(t)
    dy_dt = sqrt3 * np.cos(t)
    return np.sqrt(dx_dt**2 + dy_dt**2)

def integrand(t):
    return f(t) * ds(t)

result, error = quad(integrand, 0, 2 * np.pi)

print("Line integral result:", result)
print("Estimated error:", error)

# **Numpy**

### Read Lisan_Al_Gaib.pdf for problem description and complete the following functions

In [None]:
import time # to time the execution
import numpy as np
import matplotlib.pyplot as plt

In [None]:
### TODO 1
### Load data from data_path
### Check the input file spice_locations.txt to understand the Data Format
### Return : np array of size Nx2
def load_data(data_path):
    data = np.loadtxt(data_path, delimiter=",")
    return data

In [None]:
### TODO 2.1
### If init_centers is None, initialize the centers by selecting K data points at random without replacement
### Else, use the centers provided in init_centers
### Return : np array of size Kx2
def initialise_centers(data, K, init_centers=None):
    if init_centers is None:
        indices = np.random.choice(len(data), K, replace=False)
        centers = data[indices]
    else:
        centers = init_centers
    return centers

In [None]:
### TODO 2.2
### Initialize the labels to all ones to size (N,) where N is the number of data points
### Return : np array of size N
def initialise_labels(data):
    return np.zeros(len(data), dtype=int)

In [None]:
### TODO 3.1 : E step
### For Each data point, find the distance to each center
### Return : np array of size NxK
def calculate_distances(data, centers):
    distances = np.sqrt(((data[:, np.newaxis, :] - centers)**2).sum(axis=2))
    return distances

In [None]:
### TODO 3.2 : E step
### For Each data point, assign the label of the nearest center
### Return : np array of size N
def update_labels(distances):
    return np.argmin(distances, axis=1)

In [None]:
### TODO 4 : M step
### Update the centers to the mean of the data points assigned to it
### Return : np array of size Kx2
def update_centers(data, labels, K):
    centers = np.array([data[labels == k].mean(axis=0) for k in range(K)])
    return centers


In [None]:
### TODO 6 : Check convergence
### Check if the labels have changed from the previous iteration
### Return : True / False
def check_termination(labels1, labels2):
    return np.all(labels1 == labels2)

In [None]:
### DON'T CHANGE ANYTHING IN THE FOLLOWING FUNCTION
def kmeans(data_path:str, K:int, init_centers):
    '''
    Input :
        data (type str): path to the file containing the data
        K (type int): number of clusters
        init_centers (type numpy.ndarray): initial centers. shape = (K, 2) or None
    Output :
        centers (type numpy.ndarray): final centers. shape = (K, 2)
        labels (type numpy.ndarray): label of each data point. shape = (N,)
        time (type float): time taken by the algorithm to converge in seconds
    N is the number of data points each of shape (2,)
    '''
    data = load_data(data_path)    
    centers = initialise_centers(data, K, init_centers)
    labels = initialise_labels(data)

    start_time = time.time() # Time stamp 

    while True:
        distances = calculate_distances(data, centers)
        labels_new = update_labels(distances)
        centers = update_centers(data, labels_new, K)
        if check_termination(labels, labels_new): break
        else: labels = labels_new
 
    end_time = time.time() # Time stamp after the algorithm ends
    return centers, labels, end_time - start_time 

In [None]:
### TODO 7
def visualise(data_path, labels, centers):
    data = load_data(data_path)

    # Scatter plot of the data points
    plt.scatter(data[:, 0], data[:, 1], c=labels, s=50, cmap='viridis')
    plt.scatter(centers[:, 0], centers[:, 1], c='black', s=200, alpha=0.5)

    plt.title('K-means clustering')

    plt.xlabel('Longitude')

    plt.ylabel('Latitude')

    plt.savefig('kmeans.png')

    ## DO NOT CHANGE THE FOLLOWING LINE
    return plt

In [None]:
### After you have completed the above functions, run the following code to generate the plot
data_path = 'spice_locations.txt'
K, init_centers = 2, None
centers, labels, time_taken = kmeans(data_path, K, init_centers)
print('Time taken for the algorithm to converge:', time_taken)
visualise(data_path, labels, centers)