In [None]:
import csv
import json
from datetime import datetime, timedelta, timezone
import sys

# Read the original JSON file
with open('pre-parsed_messages.json', 'r', encoding='utf-8') as f:
    original_data = json.load(f)

# Define the list of keywords
keywords = ["путин", "сво", "пригожин", "вагнер", "украина"]

# Define the mapping of periods to week numbers
periods_map = {
    "27.05.2023-03.06.2023": 1,
    "04.06.2023-10.06.2023": 2,
    "11.06.2023-17.06.2023": 3,
    "18.06.2023-24.06.2023": 4,
    "25.06.2023-01.07.2023": 5,
    "02.07.2023-08.07.2023": 6,
    "09.07.2023-15.07.2023": 7,
    "16.07.2023-22.07.2023": 8
}

# Define a dictionary to store aggregated data
aggregated_data = {}
chanel_city_region_map = {}
chanel_proximity = {}

# Initialize the aggregated data dictionary
for channel_url in set(item['channel_url'] for item in original_data):
    aggregated_data[channel_url] = {week: {'message_count': 0, 'emoticon_count': {}, 'sentiment_count': {'positive': {'count': 0}, 'negative': {'count': 0}, 'neutral': {'count': 0}}, 'keyword_count': {keyword: {'count': 0, 'emoticon_count': {}, 'sentiment_count': {'positive': {'count': 0}, 'negative': {'count': 0}, 'neutral': {'count': 0}}} for keyword in keywords}} for week in range(1, 9)}
    
    

# Iterate through each message in the original JSON
for item in original_data:
    channel_url = item['channel_url']
    chanel_city_region_map[channel_url] = {'city': item['city'], 'region': item['region']}
    message_timestamp = datetime.strptime(item['date'], '%Y-%m-%dT%H:%M:%S%z')
    proximity = 1 if "Ростов" in item['city'] or "Воронеж" in item['city'] or "Елец" in item['city'] else (2 if "Ростовская область" in item['region'] or "Воронежская область" in item['region'] or "Липетская область" in item['region'] else 3)  
    chanel_proximity[channel_url] = {'proximity': proximity}

    if item.get('message') is None:
        message_content = ''
    else:
        message_content = item.get('message')

    if item.get('reactions') is None:
        reactions = {}
    else:
        reactions = item.get('reactions', {}).get('results', [])

    week_number = None
    
    for period, num in periods_map.items():
        start_date, end_date = period.split('-')
        start_date = datetime.strptime(start_date, '%d.%m.%Y')
        end_date = datetime.strptime(end_date, '%d.%m.%Y') + timedelta(days=1)
        start_date = start_date.replace(tzinfo=timezone.utc)
        end_date = end_date.replace(tzinfo=timezone.utc)
        
        if start_date <= message_timestamp < end_date:
            week_number = num
            break
    
    if week_number:
        aggregated_data[channel_url][week_number]['message_count'] += 1
        for reaction in reactions:
            emoticon = reaction.get('reaction', {}).get('emoticon')
            count = reaction.get('count', 0)
           
            sentiment = "positive" if "😎" in emoticon or "😍" in emoticon or "👌" in emoticon or "🔥" in emoticon or "🤩" in emoticon or "🥰" in emoticon or "💘" in emoticon or "💯" in emoticon or "🎉" in emoticon or "❤‍🔥" in emoticon or "🤗" in emoticon or "👍" in emoticon or "😁" in emoticon or "🤣" in emoticon or "🆒" in emoticon or "🏆" in emoticon or "🍾" in emoticon or "❤" in emoticon else ("negative" if "😡" in emoticon or "🥴" in emoticon or "😭" in emoticon or "👎" in emoticon or "🤬" in emoticon or "💔" in emoticon or "😢" in emoticon or "🖕" in emoticon or "🤡" in emoticon or "😨" in emoticon or "💩" in emoticon or "🤮" in emoticon or "😱" in emoticon else "neutral")
            aggregated_data[channel_url][week_number]['sentiment_count'][sentiment]['count'] += 1

            if emoticon:
                if emoticon in aggregated_data[channel_url][week_number]['emoticon_count']:
                    aggregated_data[channel_url][week_number]['emoticon_count'][emoticon] += count
                else:
                    aggregated_data[channel_url][week_number]['emoticon_count'][emoticon] = count
        
        # Count keyword occurrences in the message content
        for keyword in keywords:
            if keyword in message_content.lower():
                aggregated_data[channel_url][week_number]['keyword_count'][keyword]['count'] += 1
                for reaction in reactions:
                    emoticon = reaction.get('reaction', {}).get('emoticon')
                    count = reaction.get('count', 0)
                    
                    sentiment = "positive" if "😎" in emoticon or "😍" in emoticon or "👌" in emoticon or "🔥" in emoticon or "🤩" in emoticon or "🥰" in emoticon or "💘" in emoticon or "💯" in emoticon or "🎉" in emoticon or "❤‍🔥" in emoticon or "🤗" in emoticon or "👍" in emoticon or "😁" in emoticon or "🤣" in emoticon or "🆒" in emoticon or "🏆" in emoticon or "🍾" in emoticon or "❤" in emoticon else ("negative" if "😡" in emoticon or "🥴" in emoticon or "😭" in emoticon or "👎" in emoticon or "🤬" in emoticon or "💔" in emoticon or "😢" in emoticon or "🖕" in emoticon or "🤡" in emoticon or "😨" in emoticon or "💩" in emoticon or "🤮" in emoticon or "😱" in emoticon else "neutral")
                    aggregated_data[channel_url][week_number]['keyword_count'][keyword]['sentiment_count'][sentiment]['count'] += 1

                    if emoticon:
                        if emoticon in aggregated_data[channel_url][week_number]['keyword_count'][keyword]['emoticon_count']:
                            aggregated_data[channel_url][week_number]['keyword_count'][keyword]['emoticon_count'][emoticon] += count
                        else:
                            aggregated_data[channel_url][week_number]['keyword_count'][keyword]['emoticon_count'][emoticon] = count
# Convert the aggregated data to a list of dictionaries
result = [{'channel_url': channel_url, 'weeks': weeks, 'city': chanel_city_region_map[channel_url]['city'], 'region': chanel_city_region_map[channel_url]['region'], 'proximity': chanel_proximity[channel_url]['proximity']} for channel_url, weeks in aggregated_data.items()]
