In [None]:
import requests
from bs4 import BeautifulSoup
import csv

def scrape_cnn_articles():
    url = "https://www.cnn.com/world"
    response = requests.get(url)
    soup = BeautifulSoup(response.content, 'html.parser')

    articles = []

    # CNN might use broader containers like 'article' or specific classes
    for container in soup.find_all('article'):  # Adjust the tag if necessary
        headline_tag = container.find('h1', class_='headline__text')
        link_tag = container.find('a')
        if headline_tag and link_tag:
            title = headline_tag.get_text().strip()
            link = link_tag['href']
            if not link.startswith('http'):
                link = 'https://www.cnn.com' + link
            articles.append((title, link))

    return articles

def save_to_csv(articles, filename):
    with open(filename, mode='w', newline='', encoding='utf-8') as file:
        writer = csv.writer(file)
        writer.writerow(['Title', 'URL'])
        writer.writerows(articles)

if __name__ == "__main__":
    articles = scrape_cnn_articles()
    if articles:
        save_to_csv(articles, 'cnn_articles.csv')
        print(f"Saved {len(articles)} articles to cnn_articles.csv")
    else:
        print("No articles found")


No articles found


Q2

In [None]:
import pandas as pd
import re


def is_valid_email(email):
    email_regex = re.compile(r'^[\w\.-]+@[\w\.-]+\.\w+$')
    return bool(email_regex.match(email))

input_file = 'input_users.csv'
output_file = 'cleaned_users.csv'
df = pd.read_csv(input_file)


df.drop_duplicates(subset='user_id', inplace=True)


df = df[df['email'].apply(is_valid_email)]


df.to_csv(output_file, index=False)

print(f"Cleaned data has been written to {output_file}")


Q3

In [None]:
from django.db import models
from django.utils import timezone
from django.db.models import Sum
from datetime import timedelta

class Customer(models.Model):
    name = models.CharField(max_length=255)
    email = models.EmailField(unique=True)


class Order(models.Model):
    STATUS_CHOICES = (
        ('P', 'Pending'),
        ('C', 'Completed'),
        ('F', 'Failed'),
    )
    customer = models.ForeignKey(Customer, on_delete=models.CASCADE)
    order_date = models.DateTimeField()
    status = models.CharField(max_length=1, choices=STATUS_CHOICES)
    total_amount = models.DecimalField(max_digits=10, decimal_places=2)

    @staticmethod
    def top_customers(last_n_months=6, top_n=5):
        six_months_ago = timezone.now() - timedelta(days=last_n_months * 30)
        return (Order.objects.filter(order_date__gte=six_months_ago, status='C')
                .values('customer')
                .annotate(total_spent=Sum('total_amount'))
                .order_by('-total_spent')[:top_n])


How we can use it

In [None]:
from django.shortcuts import render
from .models import Order

def top_customers_view(request):
    top_customers = Order.top_customers()

    context = {
        'top_customers': top_customers,
    }
    return render(request, 'top_customers.html', context)


In [None]:
## Template

<!-- top_customers.html -->

<!DOCTYPE html>
<html>
<head>
    <title>Top Customers</title>
</head>
<body>
    <h1>Top 5 Customers Who Spent the Most in the Last 6 Months</h1>
    <ul>
        {% for customer in top_customers %}
            <li>Customer ID: {{ customer.customer }}, Total Spent: ${{ customer.total_spent }}</li>
        {% endfor %}
    </ul>
</body>
</html>


Q4

In [None]:
import time
from collections import defaultdict, deque
from threading import Lock

class RateLimiter:
    def __init__(self, max_requests, time_window):
        self.max_requests = max_requests
        self.time_window = time_window
        self.requests = defaultdict(deque)
        self.locks = defaultdict(Lock)

    def allow_request(self, user_id):
        current_time = time.time()
        with self.locks[user_id]:
            user_requests = self.requests[user_id]
            # Remove requests that are outside the time window
            while user_requests and user_requests[0] < current_time - self.time_window:
                user_requests.popleft()
            if len(user_requests) < self.max_requests:
                user_requests.append(current_time)
                return True
            else:
                return False

:
rate_limiter = RateLimiter(max_requests=5, time_window=60)

user_id = 'user_123'
if rate_limiter.allow_request(user_id):
    print("Request allowed")
else:
    print("Request denied")


Q5


In [None]:
from typing import List, Dict, Callable, Any

def aggregate_data(data: List[Dict], key: str, aggregator: Callable[[List[Any]], Any]) -> Dict[Any, Any]:
    grouped_data = {}
    for item in data:
        group_key = item.get(key)
        if group_key not in grouped_data:
            grouped_data[group_key] = []
        grouped_data[group_key].append(item[key])

    return {k: aggregator(v) for k, v in grouped_data.items()}


if __name__ == "__main__":
    from statistics import mean

    data = [
        {"category": "A", "value": 10},
        {"category": "B", "value": 20},
        {"category": "A", "value": 30},
        {"category": "B", "value": 40},
        {"category": "A", "value": 50},
    ]

    key = "category"

    def sum_aggregator(values: List[Any]) -> Any:
        return sum(values)

    def mean_aggregator(values: List[Any]) -> Any:
        return mean(values)

    result_sum = aggregate_data(data, 'value', sum_aggregator)
    print(f"Sum Aggregation: {result_sum}")

    result_mean = aggregate_data(data, 'value', mean_aggregator)
    print(f"Mean Aggregation: {result_mean}")


Q6


In [2]:
def find_duplicate(nums):

    tortoise = nums[0]
    hare = nums[0]

    while True:
        tortoise = nums[tortoise]
        hare = nums[nums[hare]]
        if tortoise == hare:
            break


    ptr1 = nums[0]
    ptr2 = hare

    while ptr1 != ptr2:
        ptr1 = nums[ptr1]
        ptr2 = nums[ptr2]

    return ptr1


nums = [1, 3, 4, 2, 2]
print(find_duplicate(nums))


2
