In [10]:
import pandas as pd
import networkx as nx
# Step 1: Read trusted profiles and create a set of trusted usernames
trusted_profiles_df = pd.read_csv('d:/falconx/datingappdatascrap/dissimilarity/trusted_profiles_info.csv', index_col=0)
trusted_usernames = set(trusted_profiles_df['username'])
len(trusted_usernames)

970

In [11]:
# Step 2: Read user friends data and build a graph
user_friends_df = pd.read_csv('d:/falconx/datingappdatascrap//normal/user_friends.csv')
G = nx.from_pandas_edgelist(user_friends_df, 'username', 'friend_username')
G.number_of_nodes()

3386

In [33]:
# Step 3: Initialize trust scores
initial_trust_scores = {username: 1 if username in trusted_usernames else 0 for username in G.nodes}

# Step 4: Propagate trust scores using PageRank or a similar algorithm
# Here we use a damping factor of 0.85 for the PageRank algorithm
trust_scores = nx.pagerank(G, personalization=initial_trust_scores, alpha=0.9)
trust_scores_df = pd.DataFrame.from_dict(trust_scores, orient='index').reset_index()
trust_scores_df.columns = ['username', 'trust_score']
trust_scores_sorted = trust_scores_df.sort_values(by='trust_score', ascending=False)
trust_scores_sorted.to_csv('trust_scores.csv', index=False)

In [20]:
username_to_index_df = pd.read_csv('d:/falconx/datingappdatascrap/dissimilarity/processed_data.csv', index_col=0)
username_to_index_df = username_to_index_df.dropna(subset = ['username'])
username_to_index = pd.Series(username_to_index_df.index.values, index=username_to_index_df.username).to_dict()
len(username_to_index)

44643

In [None]:
# Map the normalized trust scores to profile indices
index_trust_scores = {username_to_index[username]: score for username, score in trust_scores.items() if username in username_to_index}


In [24]:
# Convert the trust scores to a DataFrame for further analysis
trust_scores_df = pd.DataFrame(list(index_trust_scores.items()), columns=['profile_index', 'trust_score'])

In [25]:
trust_scores_df.info

<bound method DataFrame.info of      profile_index  trust_score
0                0     0.007880
1             7756     0.000027
2            44482     0.000201
3            24809     0.004635
4             7589     0.000099
..             ...          ...
638          47024     0.000071
639          47189     0.000031
640          47283     0.000071
641          47302     0.000143
642          47335     0.000121

[643 rows x 2 columns]>

In [16]:
trust_scores_df.to_csv('trust_scores.csv')

In [None]:
from IPython.display import FileLink

# Replace 'path_to_output.csv' with your actual file path
output_file_path = '/mnt/data/trust_scores.csv'
display(FileLink(output_file_path))
