## MongoDB

### Setup

In [1]:
import pandas as pd
from pymongo import MongoClient
client = MongoClient('localhost',27017) 
db = client.apan5400

In [2]:
mongo_df = pd.read_csv('car_prices.csv')

### Create a collection and insert data

In [3]:
cars = db['collection_car']
db['cars'].drop()

In [4]:
cars = db['cars']

In [5]:
# Convert the DataFrame to a dictionary
cars.insert_many(mongo_df.to_dict('records'));

### Total vehicle production each year

In [6]:
pipeline = [
    {"$group": {"_id": "$year", "total_production": {"$sum": 1}}},
    {"$project": {"year": "$_id", "total_production": 1, "_id": 0}},
]

In [7]:
result = list(cars.aggregate(pipeline))
df1 = pd.DataFrame(result)
df1 = df1[['year', 'total_production']]
df1

Unnamed: 0,year,total_production
0,1998,2149
1,1997,1546
2,1989,20
3,1991,67
4,2014,81070
5,1983,1
6,2015,9437
7,1982,2
8,1994,392
9,1990,49


### Top 10 model that sold the most units over time

In [8]:
pipeline = [
    {"$group": {"_id": "$model", "model_count": {"$sum": 1}}},
    {"$match": {"model_count": {"$gt": 1000}}},
    {"$sort": {"model_count": -1}},
    {"$project": {"model": "$_id", "model_count": 1, "_id": 0}},
    {"$limit": 10}
]

In [9]:
result = list(cars.aggregate(pipeline))
df2 = pd.DataFrame(result)
df2 = df2[['model','model_count']]
df2

Unnamed: 0,model,model_count
0,Altima,19349
1,F-150,14479
2,Fusion,12946
3,Camry,12545
4,Escape,11861
5,,10399
6,Focus,10394
7,Accord,9127
8,3 Series,8204
9,Grand Caravan,7941


### Top 10 brand (make) that generated the most total sales

In [10]:
pipeline = [
    {"$group": {"_id": "$make", "total_sales": {"$sum": "$sellingprice"}}},
    {"$project": {"make": "$_id", "total_sales": 1, "_id": 0}},
    {"$sort": {"total_sales": -1}},
    {"$limit": 10}
]

In [11]:
result = list(cars.aggregate(pipeline))
df3 = pd.DataFrame(result)
df3 = df3[['make','total_sales']]
df3

Unnamed: 0,make,total_sales
0,Chevrolet,721003399.0
1,Nissan,633272955.0
2,Toyota,488392710.0
3,BMW,444254638.0
4,Mercedes-Benz,370083906.0
5,Infiniti,312489190.0
6,Honda,297645409.0
7,Lexus,240615055.0
8,Jeep,230583753.0
9,GMC,179905320.0


In [12]:
pipeline = [
    {"$match": {"make": {"$in": ["Nissan", "Toyota", "Honda"]}}},
    {"$group": {
        "_id": {"make": "$make", "year": "$year"},
        "total_units": {"$sum": 1},
        "total_sales": {"$sum": "$sellingprice"}
    }},
    {"$project": {
        "_id": 0,
        "make": "$_id.make",
        "year": "$_id.year",
        "total_units": 1,
        "total_sales": 1
    }},
    {"$sort": {"make": 1, "year": 1}}
]

In [13]:
result = list(cars.aggregate(pipeline))
df4 = pd.DataFrame(result)
pd.set_option('display.max_rows', None)
df4 = df4[['make','year','total_units','total_sales']]
df4

Unnamed: 0,make,year,total_units,total_sales
0,Honda,1990,10,4100.0
1,Honda,1991,10,3575.0
2,Honda,1992,22,13725.0
3,Honda,1993,26,16125.0
4,Honda,1994,68,46750.0
5,Honda,1995,70,54150.0
6,Honda,1996,88,68925.0
7,Honda,1997,197,182100.0
8,Honda,1998,283,351200.0
9,Honda,1999,310,456350.0


### Flask

In [14]:
import pandas as pd
!pip install Flask pymongo



In [15]:
df = pd.read_csv('car_prices.csv')

In [16]:
from flask import Flask, request, render_template_string
from pymongo import MongoClient

In [17]:
import json
# Convert the data frame into a list of JSON objects
json_list = df.to_json(orient='records')
json_list = json.loads(json_list)

In [18]:
len(json_list)

558837

In [19]:
# Write all JSON objects into a text (JSON string) file
with open('json_string', 'w') as json_string_file:
    json.dump(json_list, json_string_file)

In [20]:
# Print the number of records in the resulting file
with open('json_string', 'r') as file:
    json_file = json.load(file)
len(json_file)

558837

In [21]:
# Load JSON data into a DataFrame
json_df = pd.DataFrame(json_file)

In [22]:
app = Flask('5400_Group_project_1_Web')

In [23]:
# Select all BMW vehicles sold in 2014
df_bmw_2014 = json_df[(json_df['make'] == 'BMW') & (json_df['year'] == 2014)]
bmw_list = df_bmw_2014.values.tolist()

In [24]:
# router
@app.route('/bmw')
def bmw_cars():
    output = df_bmw_2014.to_html()
    return output

In [None]:
app.run(host='localhost', port=5001)

 * Serving Flask app '5400_Group_project_1_Web'
 * Debug mode: off


 * Running on http://localhost:5001
Press CTRL+C to quit
127.0.0.1 - - [17/Apr/2024 18:22:10] "GET / HTTP/1.1" 404 -
127.0.0.1 - - [17/Apr/2024 18:22:10] "GET /favicon.ico HTTP/1.1" 404 -
127.0.0.1 - - [17/Apr/2024 18:22:15] "GET /bmw HTTP/1.1" 200 -
