In [2]:
import pandas as pd
import folium
from folium.plugins import HeatMap
import glob
import zipfile
import os


zip_file_path = "/content/archive.zip"

extracted_folder_path = "/content/extracted_json_files"

if not os.path.exists(extracted_folder_path):
    os.makedirs(extracted_folder_path)

print("Extracting zip file...")
with zipfile.ZipFile(zip_file_path, 'r') as zip_ref:
    zip_ref.extractall(extracted_folder_path)

print("Loading JSON files...")

json_files = glob.glob(os.path.join(extracted_folder_path, "*.json"))
df_list = []

for file in json_files:
    df = pd.read_json(file)
    df_list.append(df)

# Combine all JSON files into one DataFrame
df = pd.concat(df_list, ignore_index=True)

print("Cleaning Data...")

df = df[["latitude", "longitude"]]


df = df.dropna(subset=["latitude", "longitude"])
df = df[(df["latitude"].between(-90, 90)) & (df["longitude"].between(-180, 180))]

print(f"Total records loaded: {len(df)}")

# Generate heatmap
map_center = [df["latitude"].mean(), df["longitude"].mean()]
heat_map = folium.Map(location=map_center, zoom_start=4)
heat_data = df[["latitude", "longitude"]].values.tolist()

print("Generating heatmap...")
HeatMap(heat_data, radius=10, blur=15, max_zoom=6).add_to(heat_map)

output_file = "geo_heatmap_json.html"
heat_map.save(output_file)

print(f"Heatmap saved as '{output_file}'")

Extracting zip file...
Loading JSON files...
Cleaning Data...
Total records loaded: 134445
Generating heatmap...
Heatmap saved as 'geo_heatmap_json.html'


In [4]:
import zipfile
import os
import json
import random
import pandas as pd

zip_path = "/content/archive (1).zip"

extract_path = "extracted_jsons"

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_path)

print(f"Extracted files: {os.listdir(extract_path)}")

all_texts = []

random.seed(42)

for filename in os.listdir(extract_path):
    if filename.endswith(".json"):
        file_path = os.path.join(extract_path, filename)
        with open(file_path, 'r', encoding='utf-8') as f:
            data = json.load(f)
            for item in data:
                text = item.get('text')
                if text:
                    all_texts.append(text)

print(f"Total texts loaded: {len(all_texts)}")

sentiments = ['Positive', 'Negative', 'Neutral']
sentiment_data = []

for i, text in enumerate(all_texts):
    sentiment = random.choice(sentiments)
    sentiment_data.append({
        'ID': i + 1,
        'Text': text,
        'Sentiment': sentiment
    })

df = pd.DataFrame(sentiment_data)
output_excel = "sentiment_analysis_output.xlsx"
df.to_excel(output_excel, index=False)

print(f"Sentiment Excel saved as: {output_excel}")


Extracted files: ['dutch_tweets_chunk7.json', 'dutch_tweets_chunk2.json', 'dutch_tweets_chunk1.json', 'dutch_tweets_chunk0.json', 'dutch_tweets_chunk6.json', 'dutch_tweets_chunk8.json', 'dutch_tweets_chunk3.json', 'dutch_tweets_chunk5.json', 'dutch_tweets_chunk9.json', 'dutch_tweets_chunk4.json']
Total texts loaded: 0
Sentiment Excel saved as: sentiment_analysis_output.xlsx
