### Event Analysis 

In [46]:
import pandas as pd
import math
from itertools import combinations

In [47]:
### Load Data 
voteData = pd.read_csv(r'C:\Users\beneb\Desktop\plural_data\votes.csv')
groupData = pd.read_csv(r'C:\Users\beneb\Desktop\plural_data\users_to_groups.csv')
forumQuestions = pd.read_csv(r'C:\Users\beneb\Desktop\plural_data\forum_questions.csv')
groupCategories = pd.read_csv(r'C:\Users\beneb\Desktop\plural_data\group_categories.csv')

# print("voteData:", len(voteData))
# print("groupData:", len(groupData))
# print("forumQuestions:", len(forumQuestions))
# print("groupCategories:", len(groupCategories))

In [48]:
def getLatestVoteByUserAndOptionId(df, option_id):
    """
    Gets the latest vote data by users for a specified option ID.
    :param: df (pandas.DataFrame): The DataFrame containing the vote data.
    :param: option_id (str): The ID of the option for which to query vote data.
    """
    
    # Filter the DataFrame by option_id
    filtered_df = df[df['option_id'] == option_id].copy()

    # Rank each user by updated_at in descending order
    filtered_df['rank'] = filtered_df.groupby('user_id')['updated_at'].rank(method='first', ascending=False)

    # Select the latest vote for each user
    latest_votes = filtered_df[filtered_df['rank'] == 1]

    # Create a dictionary from the DataFrame
    vote_dict = latest_votes.set_index('user_id')['num_of_votes'].to_dict()

    return vote_dict

In [49]:
voteDict = getLatestVoteByUserAndOptionId(voteData, '4ad65252-e651-460c-af6b-289769a3964b')
# print(voteDict)

In [50]:
def getGroupsByUserAndOptionId(df, numOfVotesDictionary, groupCategories):
    """
    Gets group data and creates group dictionary based on user IDs and option ID.
    :param: df (pandas.DataFrame): The DataFrame containing the group data.
    :param: numOfVotesDictionary (dict): Dictionary of user IDs and their respective number of votes.
    :param: groupCategories (list of str): Array of group category IDs.
    """

    # Filter users_to_groups DataFrame by user IDs and group category IDs
    filtered_df = df[df['user_id'].isin(numOfVotesDictionary.keys()) & df['group_category_id'].isin(groupCategories)]

    # Group by group_id and aggregate user_ids into a list
    group_array = filtered_df.groupby('group_id')['user_id'].agg(list).reset_index()

    # Convert group_array DataFrame to dictionary
    groups_dictionary = dict(zip(group_array['group_id'], group_array['user_id']))

    return groups_dictionary


In [51]:
group_dict = getGroupsByUserAndOptionId(groupData, voteDict, ['0419db52-cda8-4cf2-adb2-a881234ea1f8', 'b1862f74-458c-4eb8-9445-1ccf9ccd1712'])

# Print the resulting dictionary
# print(group_dict)

In [52]:
def create_group_memberships(groups):
    """
    Define group memberships for each participant.
    :param: groups (list of lists): a list denotes the group and contains its members. 
    :param: votes (list): number of participant's votes for a given project proposal.
    :returns (list of lists): retunrs a list of group membersips for each participant.  
    """
    memberships = {}
    for group, members in groups.items():
        for member in members:
            memberships.setdefault(member, []).append(group)
    return memberships

In [53]:
memberships = create_group_memberships(group_dict)
# print(memberships)

In [54]:
def ConnectionOrientedClusterMatch(groups, contributions):
    # memberships[i] is the number of groups agent i is in
    memberships = {user: sum(user in members for members in groups.values()) for user in contributions.keys()}

    # friend_matrix[i][j] is the number of groups that agent i and j are both in
    friend_matrix = {
        user1: {user2: sum(user1 in members and user2 in members for members in groups.values()) for user2 in contributions.keys()}
        for user1 in contributions.keys()
    }

    # build up the funding amount. First, add in everyone's contributions
    funding_amount = sum(contributions.values())

    def K(user, h):
        if sum(friend_matrix[user][other_user] for other_user in h) > 0:
            return math.sqrt(contributions[user])
        return contributions[user]

    funding_amount += sum(
        2 * math.sqrt(sum(K(user, groups[group1]) / memberships[user] for user in groups[group0]))
        * math.sqrt(sum(K(other_user, groups[group0]) / memberships[other_user] for other_user in groups[group1]))
        for group0, group1 in combinations(groups.keys(), 2)
    )

    return funding_amount

In [55]:
# Test data
groups = {
    'group0': ['user0', 'user1'],
    'group1': ['user1', 'user2', 'user3'],
    'group2': ['user0', 'user2'],
}

contributions = {
    'user0': 1,
    'user1': 2,
    'user2': 3,
    'user3': 4,
}

result_2 = ConnectionOrientedClusterMatch(group_dict, voteDict)
result_sqrt_2 = math.sqrt(result_2)
# print(result_sqrt_2)
