In [124]:
import csv
from pathlib import Path
from dotenv import load_dotenv
import os
import pymongo
import uuid
import random
import json


In [72]:
db_client = pymongo.MongoClient('mongodb://localhost:27017')
collections = db_client['db_ai_travel_planner']
tb_city = collections['tb_city']

In [73]:
def load_csv(filepath):
    data = []
    with open(filepath, 'r', newline='', encoding='utf-8') as csvfile:  # Handle encoding!
        reader = csv.reader(csvfile)  # Or csv.DictReader for dictionaries
        header = next(reader)  # Read the header row (if it exists)
        for row in reader:
            data.append(row)  # Or data.append(dict(zip(header, row))) for DictReader
    return header, data  # Return header and data

In [87]:
continent_map = {}  #key: country, value: continent
continents = {} #key: continent, value: 1
#read continent info
header, data = load_csv(Path("./countries.csv"))
for row in data:
    continent = row[12].replace('Americas', 'America').replace('Oceania', 'Australia').lower()
    if continent != '' and continent != 'Polar':
        continent_map[row[1]] = continent
#print(continent_map)

In [None]:
header, data = load_csv(Path("./city_country_shortened.csv"))
for row in data:
    if row[1] in continent_map:
        #print(row[0] + ',' + row[1] + ',' + continent_map[row[1]])
        a = 1
    else:
        print('Not found continent for this country ------ ' + row[1]) #replace country name in file countries.csv -> run code again to update the map


In [106]:
continents = ['asia', 'america', 'africa', 'australia', 'europe']

In [107]:
#select most commented cities for HOMEPAGE
MIN_REVIEW_COUNT = 100
cities_with_image = []  #list of city UUIDs that has image Urls

def generate_cities_for_home():
    home_cities = {}
    #top banner (randomly pick a city in europe) ~ 100 cities
    query = {'continent':'europe', 'error': None, 'review': {'$gt': MIN_REVIEW_COUNT}}
    top_review_cities = tb_city.find(query).sort({'review': -1}).limit(100)
    total_docs = tb_city.count_documents(query)
    rand_top_index = random.randint(0, total_docs)
    top_banner_city = top_review_cities[rand_top_index]
    cities_with_image.append(top_banner_city['uuid'])
    home_cities['top_banner'] = top_banner_city['uuid']
    #print('Top banner city: ' + top_banner_city['name'] + ' country: ' + top_banner_city['country'])
    #random pick (randomly pick a city in america or asia) ~ 100 cities
    query = {'$or': [{'continent':'america'}, {'continent':'asia'}], 'error': None, 'review': {'$gt': MIN_REVIEW_COUNT}}
    random_pick_cities = tb_city.find(query).sort({'review': -1}).limit(100)
    total_docs = tb_city.count_documents(query)
    rand_top_index = random.randint(0, total_docs)
    rand_pick_city = random_pick_cities[rand_top_index]
    cities_with_image.append(rand_pick_city['uuid'])
    home_cities['random_pick'] = rand_pick_city['uuid']
    #print('Random pick city: ' + rand_pick_city['name'] + ' country: ' + rand_pick_city['country'])
    #each continent get randomly 20 cities (get 20 cities, sort by review count)
    for continent in continents:
        cities_in_continent = []
        query = {'continent':continent, 'error': None, 'review': {'$gt': MIN_REVIEW_COUNT}}
        pipeline = [
            {"$match": query},
            {"$sample": {"size": 20}}  #X random documents
        ]
        cities = list(tb_city.aggregate(pipeline))
        for city in cities:
            cities_with_image.append(city['uuid'])
            cities_in_continent.append(city['uuid'])
        home_cities[continent] = cities_in_continent
    #
    return home_cities
#test
home_cities = generate_cities_for_home()
#print(home_cities)

In [108]:
len(cities_with_image)

99

In [123]:
#export ALL cities that had data in db -> the file size should limit to 2 MB (reduce key name)
all_cities = []
city_continent_map = {} #key: city, value: continent
for continent in continents:
    num_city_in_continent = 0
    db_city = tb_city.find({'continent':continent, 'error': None})
    for document in db_city:
        #count how many cities in a continent
        if document['name'] not in city_continent_map:
            num_city_in_continent += 1
        #save this city into the list
        if document['uuid'] == home_cities['top_banner'] or document['uuid'] == home_cities['top_banner']:
            #get full info
            all_cities.append({
                "id" : document['uuid'],
                "n" : document['name'],
                "c" : document['country'],
                "ci" : document['city_id'],
                "r" : document['continent'],
                "rc" : document['review'],
                "i" : document['img'],
                'li': document['imgUrls'],
                "wi" : document['wonder_id']
            })
        elif document['uuid'] in cities_with_image:
            #get info with 1 image, DO NOT save other images
            all_cities.append({
                "id" : document['uuid'],
                "n" : document['name'],
                "c" : document['country'],
                "ci" : document['city_id'],
                "r" : document['continent'],
                "rc" : document['review'],
                "i" : document['img'],
                "wi" : document['wonder_id']
            })
        else:
            #get simple data for searching only (no image or other relevant info)
            all_cities.append({
                "n" : document['name'],
                "c" : document['country'],
                "cid" : document['city_id'],
                "wid" : document['wonder_id']
            })
    print(continent + ' : ' + str(num_city_in_continent))
#finish
print('All cities count: ' + str(len(all_cities)))

asia : 1281
america : 1012
africa : 364
australia : 26
europe : 575
All cities count: 3258


In [126]:
#export to json file
try:
    with open('city_db.json', 'w', encoding='utf-8') as f:  # Use utf-8 encoding
            json.dump(all_cities, f, separators=(',', ':'))  #no break line or extra spaces
except (TypeError, OSError, json.JSONDecodeError) as e:  # Catch potential errors
    print(f"Error exporting to JSON: {e}")