In [1]:
import csv
from pathlib import Path
from dotenv import load_dotenv
import os
import pymongo
import uuid
import random
import json


In [2]:
db_client = pymongo.MongoClient('mongodb://localhost:27017')
collections = db_client['db_ai_travel_planner']
tb_city = collections['tb_city']

In [3]:
def load_csv(filepath):
    data = []
    with open(filepath, 'r', newline='', encoding='utf-8') as csvfile:  # Handle encoding!
        reader = csv.reader(csvfile)  # Or csv.DictReader for dictionaries
        header = next(reader)  # Read the header row (if it exists)
        for row in reader:
            data.append(row)  # Or data.append(dict(zip(header, row))) for DictReader
    return header, data  # Return header and data

In [4]:
continent_map = {}  #key: country, value: continent
continents = {} #key: continent, value: 1
#read continent info
header, data = load_csv(Path("./countries.csv"))
for row in data:
    continent = row[12].replace('Americas', 'America').replace('Oceania', 'Australia').lower()
    if continent != '' and continent != 'Polar':
        continent_map[row[1]] = continent
#print(continent_map)

In [5]:
header, data = load_csv(Path("./city_country_shortened.csv"))
for row in data:
    if row[1] in continent_map:
        #print(row[0] + ',' + row[1] + ',' + continent_map[row[1]])
        a = 1
    else:
        print('Not found continent for this country ------ ' + row[1]) #replace country name in file countries.csv -> run code again to update the map


In [6]:
continents = ['asia', 'america', 'africa', 'australia', 'europe']

In [7]:
#select most commented cities for HOMEPAGE
MIN_REVIEW_COUNT = 100

def generate_cities_for_home():
    cities_with_image = []  #list of city UUIDs that has image Urls
    home_cities = {}
    #top banner (randomly pick a city in europe) ~ 100 cities
    query = {'continent':'europe', 'error': None, 'review': {'$gt': MIN_REVIEW_COUNT}}
    top_review_cities = tb_city.find(query).sort({'review': -1}).limit(100)
    total_docs = tb_city.count_documents(query)
    rand_top_index = random.randint(0, total_docs)
    top_banner_city = top_review_cities[rand_top_index]
    cities_with_image.append(top_banner_city['uuid'])
    home_cities['top_banner'] = {'n': top_banner_city['name'], 'c': top_banner_city['country']}
    #print('Top banner city: ' + top_banner_city['name'] + ' country: ' + top_banner_city['country'])
    #random pick (randomly pick a city in america or asia) ~ 100 cities
    query = {'$or': [{'continent':'america'}, {'continent':'asia'}], 'error': None, 'review': {'$gt': MIN_REVIEW_COUNT}}
    random_pick_cities = tb_city.find(query).sort({'review': -1}).limit(100)
    total_docs = tb_city.count_documents(query)
    rand_top_index = random.randint(0, total_docs)
    rand_pick_city = random_pick_cities[rand_top_index]
    cities_with_image.append(rand_pick_city['uuid'])
    home_cities['random_pick'] = {'n': rand_pick_city['name'], 'c': rand_pick_city['country']}
    #print('Random pick city: ' + rand_pick_city['name'] + ' country: ' + rand_pick_city['country'])
    #each continent get randomly 20 cities (get 20 cities, sort by review count)
    for continent in continents:
        cities_in_continent = []
        query = {'continent':continent, 'error': None, 'review': {'$gt': MIN_REVIEW_COUNT}}
        pipeline = [
            {"$match": query},
            {"$sample": {"size": 20}}  #X random documents
        ]
        cities = list(tb_city.aggregate(pipeline))
        for city in cities:
            cities_with_image.append(city['uuid'])
            cities_in_continent.append({'n': city['name'], 'c': city['country']})
        home_cities[continent] = cities_in_continent
    #
    return home_cities, cities_with_image


In [8]:
##export ALL cities that had data in db -> the file size should limit to 2 MB (reduce key name)
continent_keys = {  #map for reducing exported file
    'asia': 1, 
    'america': 2, 
    'africa': 3, 
    'australia': 4, 
    'europe': 5
}
#1. generate random cities to show in homepage
home_cities, cities_with_image = generate_cities_for_home()
print(home_cities)
print(str(len(cities_with_image)))
#export all cities into json file
all_cities = [] #data to export
city_continent_map = {} #key: city, value: continent (just for counting how many cities in each continent)
for continent in continents:
    num_city_in_continent = 0
    db_city = tb_city.find({'continent':continent, 'error': None})
    for document in db_city:
        #count how many cities in a continent
        if document['name'] not in city_continent_map:
            num_city_in_continent += 1
        #save this city into the list
        if (document['name'] == home_cities['top_banner']['n'] and document['country'] == home_cities['top_banner']['c']) or (document['name'] == home_cities['random_pick']['n'] and document['country'] == home_cities['random_pick']['c']):
            #get full info
            all_cities.append({
                "n" : document['name'],
                "c" : document['country'],
                "ci" : document['city_id'],
                "wi" : document['wonder_id'],
                #"r" : continent_keys[document['continent']],
                "rc" : document['review'],
                "i" : document['img'],
                'li': document['imgUrls'],
            })
        elif document['uuid'] in cities_with_image:
            #get info with 1 image, DO NOT save other images
            all_cities.append({
                "n" : document['name'],
                "c" : document['country'],
                "ci" : document['city_id'],
                "wi" : document['wonder_id'],
                "rc" : document['review'],
                "i" : document['img']
            })
        else:
            #get simple data for searching only (no image or other relevant info)
            all_cities.append({
                "n" : document['name'],
                "c" : document['country'],
                "ci" : document['city_id'],
                "wi" : document['wonder_id']
            })
    print(continent + ' : ' + str(num_city_in_continent))
#finish
print('All cities count: ' + str(len(all_cities)))
#export to json file
try:
    with open('city_db.json', 'w', encoding='utf-8') as f:  # Use utf-8 encoding
            json.dump(all_cities, f, separators=(',', ':'))  #no break line or extra spaces
except (TypeError, OSError, json.JSONDecodeError) as e:  # Catch potential errors
    print(f"Error exporting to JSON: {e}")

{'top_banner': {'n': 'Wiesbaden', 'c': 'Germany'}, 'random_pick': {'n': 'Jiamusi', 'c': 'China'}, 'asia': [{'n': 'Jiangmen', 'c': 'China'}, {'n': 'Yokkaichi', 'c': 'Japan'}, {'n': 'Longjing', 'c': 'China'}, {'n': 'Sakado', 'c': 'Japan'}, {'n': 'Bacoor', 'c': 'Philippines'}, {'n': 'Chongqing', 'c': 'China'}, {'n': 'Yima', 'c': 'China'}, {'n': 'Leiyang', 'c': 'China'}, {'n': 'Bulacan', 'c': 'Philippines'}, {'n': 'Compostela', 'c': 'Philippines'}, {'n': 'Kumagaya', 'c': 'Japan'}, {'n': 'Nagasaki', 'c': 'Japan'}, {'n': 'Santa Ana', 'c': 'Philippines'}, {'n': 'Taihe', 'c': 'China'}, {'n': 'Beipiao', 'c': 'China'}, {'n': 'Cibinong', 'c': 'Indonesia'}, {'n': 'Settsu', 'c': 'Japan'}, {'n': 'Yangquan', 'c': 'China'}, {'n': 'Zhongshan', 'c': 'China'}, {'n': 'Didim', 'c': 'Turkey'}], 'america': [{'n': 'Westland', 'c': 'United States'}, {'n': 'Bronx', 'c': 'United States'}, {'n': 'Itapevi', 'c': 'Brazil'}, {'n': 'Chilliwack', 'c': 'Canada'}, {'n': 'Oceanside', 'c': 'United States'}, {'n': 'Mogi da