In [None]:
import requests
import zipfile
import os
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# Step 1: Download the ZIP file
zip_url = "https://files.consumerfinance.gov/ccdb/complaints.csv.zip"
zip_filename = "complaints.zip"

# Download the ZIP file
response = requests.get(zip_url)
with open(zip_filename, 'wb') as file:
    file.write(response.content)

# Step 2: Unzip the file
with zipfile.ZipFile(zip_filename, 'r') as zip_ref:
    zip_ref.extractall()  # Extracts to the current directory

# Determine the name of the extracted CSV file
extracted_files = os.listdir()
csv_filename = next(f for f in extracted_files if f.endswith('.csv'))

# Step 3: Load the CSV into a DataFrame
data = pd.read_csv(csv_filename, low_memory=False)  # Read the data

# Step 4: Explore the data
# Display the first few rows
print(data.head())

# Data summary statistics
print(data.describe())

# Data types and information
print(data.info())

# Step 5: Check for missing values
missing_values = data.isnull().sum()
print("Missing Values:\n", missing_values)

# Step 6: Data visualization and insights
# Plot the distribution of a specific column (example: 'Product')
sns.countplot(y='Product', data=data, order=data['Product'].value_counts().index)
plt.title('Complaints by Product')
plt.show()

# Check the number of complaints by state
sns.countplot(y='State', data=data, order=data['State'].value_counts().index[:10])  # Top 10 states
plt.title('Complaints by State')
plt.show()

# Explore relationships between categorical data (example: 'Product' and 'Company')
pd.crosstab(data['Product'], data['Company']).plot(kind='bar', stacked=True, figsize=(10,6))
plt.title('Complaints by Product and Company')
plt.xlabel('Product')
plt.ylabel('Number of Complaints')
plt.show()

# Step 7: Key Insights
# Find the most common complaint type
most_common_product = data['Product'].value_counts().idxmax()
print("Most Common Product:", most_common_product)

# Find the company with the most complaints
most_complained_company = data['Company'].value_counts().idxmax()
print("Company with Most Complaints:", most_complained_company)

# Examine complaint trends over time
data['Date received'] = pd.to_datetime(data['Date received'])
data.groupby(data['Date received'].dt.year)['Complaint ID'].count().plot(kind='line', figsize=(10,6))
plt.title('Complaint Trends Over Time')
plt.xlabel('Year')
plt.ylabel('Number of Complaints')
plt.show()


In [1]:
def find_length_of_lcis(nums):
    if not nums:
        return 0
    
    max_len = 1  # Holds the maximum length of increasing subsequence
    current_len = 1  # Holds the current increasing subsequence length
    
    for i in range(1, len(nums)):
        if nums[i] > nums[i - 1]:
            # If current element is greater than the previous, increment the current length
            current_len += 1
        else:
            # If current element is not greater, update max_len if needed and reset current_len
            max_len = max(max_len, current_len)
            current_len = 1  # Reset the current length

    # Check the final subsequence length against max_len
    max_len = max(max_len, current_len)
    
    return max_len


In [2]:
from functools import cmp_to_key

def largest_number(nums):
    # Custom comparator for sorting numbers by their concatenation results
    def compare(x, y):
        # Compare two numbers as strings in both concatenation orders
        order1 = x + y
        order2 = y + x
        # Return -1, 0, or 1 based on which order is larger
        if order1 > order2:
            return -1
        elif order1 < order2:
            return 1
        else:
            return 0
    
    # Convert all numbers to strings for concatenation
    str_nums = list(map(str, nums))
    
    # Sort the numbers with the custom comparator
    str_nums.sort(key=cmp_to_key(compare))
    
    # Join the sorted numbers into a single string
    largest_number = ''.join(str_nums)
    
    # Handle cases where the largest number is "0" due to leading zeros
    return '0' if largest_number[0] == '0' else largest_number


In [4]:
import json
import csv

# Read the JSON file
with open("DT A1 sample_json (1) (1).json", "r") as json_file:
    json_data = json.load(json_file)

# Open a CSV file to write the extracted data
with open("servlets.csv", "w", newline='') as csv_file:
    csv_writer = csv.writer(csv_file)
    
    # Write the CSV header
    csv_writer.writerow(["servlet-name", "servlet-class"])

    # Iterate over the JSON data to find "servlet-name" and "servlet-class"
    if "web-app" in json_data and "servlet" in json_data["web-app"]:
        for servlet in json_data["web-app"]["servlet"]:
            # Extract servlet-name and servlet-class
            servlet_name = servlet.get("servlet-name", "")
            servlet_class = servlet.get("servlet-class", "")
            
            # Write the data to the CSV file
            csv_writer.writerow([servlet_name, servlet_class])
