In [1]:
from http.server import BaseHTTPRequestHandler, HTTPServer
from pyspark.sql import SparkSession
from pyspark.sql.functions import col
import os
from glob import glob
import json

class HTTPRequestHandler(BaseHTTPRequestHandler):
    def do_GET(self):
        if self.path == '/country_with_minimum_cases':
            # Create a SparkSession
            spark = SparkSession.builder \
                .appName("Country with Minimum COVID Cases") \
                .getOrCreate()

            # Define the directory containing the CSV files
            directory = "/Users/saumya/Documents/spark-project/"

            # Find all CSV files in the directory
            file_paths = glob(os.path.join(directory, "*.csv"))

            # Read each CSV file into a DataFrame
            dfs = [spark.read.csv(file_path, header=True, inferSchema=True) for file_path in file_paths]

            # Union all DataFrames
            merged_df = dfs[0]
            for df in dfs[1:]:
                merged_df = merged_df.union(df)

            # Find the country with the minimum number of COVID cases
            min_cases_country = merged_df.orderBy(col("cases")).select("country").first()[0]

            # Stop the SparkSession
            spark.stop()

            # Send response
            self.send_response(200)
            self.send_header('Content-type', 'application/json')
            self.end_headers()
            response = {'country_with_minimum_cases': min_cases_country}
            self.wfile.write(json.dumps(response).encode())

        else:
            # Send 404 response for other paths
            self.send_response(404)
            self.end_headers()
            self.wfile.write(b'404 Not Found')

def run_server(port=8004):
    server_address = ('', port)
    httpd = HTTPServer(server_address, HTTPRequestHandler)
    print(f'Starting server on port {port}...')
    httpd.serve_forever()

if __name__ == '__main__':
    run_server()


Starting server on port 8004...
