
# Social Network Ads Analysis

This Jupyter Notebook contains the code used for the analysis of the Social Network Ads dataset. The analysis includes network construction, network analysis, comparison with standard graph models, and a research question analysis.

## Steps
1. Data Loading
2. Network Construction
3. Network Analysis
4. Comparison with Standard Graph Models
5. Research Question Analysis


In [None]:

import pandas as pd
import networkx as nx
import matplotlib.pyplot as plt

# Load the dataset
file_path = 'Social_Network_Ads.csv'
social_network_ads_df = pd.read_csv(file_path)

# Display the first few rows of the dataframe to understand its structure
social_network_ads_df.head()


In [None]:

# Define the similarity criteria
age_threshold = 5
salary_threshold = 10000

# Create a graph
G = nx.Graph()

# Add nodes
for index, row in social_network_ads_df.iterrows():
    G.add_node(index, age=row['Age'], salary=row['EstimatedSalary'], purchased=row['Purchased'])

# Add edges based on similarity criteria
for i in range(len(social_network_ads_df)):
    for j in range(i + 1, len(social_network_ads_df)):
        age_diff = abs(social_network_ads_df.loc[i, 'Age'] - social_network_ads_df.loc[j, 'Age'])
        salary_diff = abs(social_network_ads_df.loc[i, 'EstimatedSalary'] - social_network_ads_df.loc[j, 'EstimatedSalary'])
        if age_diff <= age_threshold and salary_diff <= salary_threshold:
            G.add_edge(i, j)

# Number of nodes and edges
num_nodes = G.number_of_nodes()
num_edges = G.number_of_edges()

# Draw the graph
plt.figure(figsize=(12, 12))
nx.draw(G, with_labels=True, node_color='lightblue', edge_color='gray', node_size=500, font_size=10)
plt.title("Social Network Ads Graph")
plt.show()

num_nodes, num_edges


In [None]:

# Degree Distribution Analysis
degree_sequence = sorted([d for n, d in G.degree()], reverse=True)
degree_count = pd.Series(degree_sequence).value_counts().sort_index()

# Connected Components Analysis
connected_components = list(nx.connected_components(G))
num_connected_components = len(connected_components)
largest_component_size = len(max(connected_components, key=len))

# Path Analysis
average_shortest_path_length = nx.average_shortest_path_length(G)

# Clustering Coefficient Density Analysis
average_clustering_coefficient = nx.average_clustering(G)

# Centrality Analysis
degree_centrality = nx.degree_centrality(G)
betweenness_centrality = nx.betweenness_centrality(G)
closeness_centrality = nx.closeness_centrality(G)

# Compile results into a dictionary
results = {
    "Degree Count": degree_count,
    "Number of Connected Components": num_connected_components,
    "Largest Component Size": largest_component_size,
    "Average Shortest Path Length": average_shortest_path_length,
    "Average Clustering Coefficient": average_clustering_coefficient,
    "Degree Centrality": degree_centrality,
    "Betweenness Centrality": betweenness_centrality,
    "Closeness Centrality": closeness_centrality
}

results


In [None]:

# Create ER, BA, and WS graphs for comparison

# Import necessary libraries
from networkx.generators.random_graphs import erdos_renyi_graph
from networkx.generators.random_graphs import barabasi_albert_graph
from networkx.generators.random_graphs import watts_strogatz_graph

# Parameters
num_nodes = 400
num_edges = 4853

# ER graph (Erdős–Rényi model)
p = num_edges / (num_nodes * (num_nodes - 1) / 2)
er_graph = erdos_renyi_graph(num_nodes, p)

# BA graph (Barabási–Albert model)
m = num_edges // num_nodes
ba_graph = barabasi_albert_graph(num_nodes, m)

# WS graph (Watts–Strogatz model)
k = 6  # Each node is connected to k nearest neighbors in ring topology
beta = 0.1  # Rewiring probability
ws_graph = watts_strogatz_graph(num_nodes, k, beta)

# Functions to compute graph statistics
def compute_graph_statistics(G):
    stats = {
        "Average Shortest Path Length": nx.average_shortest_path_length(G),
        "Average Clustering Coefficient": nx.average_clustering(G),
        "Degree Centrality": nx.degree_centrality(G),
        "Betweenness Centrality": nx.betweenness_centrality(G),
        "Closeness Centrality": nx.closeness_centrality(G)
    }
    return stats

# Compute statistics for each graph
er_stats = compute_graph_statistics(er_graph)
ba_stats = compute_graph_statistics(ba_graph)
ws_stats = compute_graph_statistics(ws_graph)

# Compile all statistics into a DataFrame for comparison
comparison_df = pd.DataFrame({
    "Original": {
        "Average Shortest Path Length": results["Average Shortest Path Length"],
        "Average Clustering Coefficient": results["Average Clustering Coefficient"],
    },
    "ER": {
        "Average Shortest Path Length": er_stats["Average Shortest Path Length"],
        "Average Clustering Coefficient": er_stats["Average Clustering Coefficient"],
    },
    "BA": {
        "Average Shortest Path Length": ba_stats["Average Shortest Path Length"],
        "Average Clustering Coefficient": ba_stats["Average Clustering Coefficient"],
    },
    "WS": {
        "Average Shortest Path Length": ws_stats["Average Shortest Path Length"],
        "Average Clustering Coefficient": ws_stats["Average Clustering Coefficient"],
    }
})

comparison_df


In [None]:

import numpy as np
import seaborn as sns

# Segment users based on age and salary ranges
age_bins = [18, 25, 35, 45, 55, 65]
salary_bins = [0, 30000, 50000, 70000, 100000, 150000]

# Add segmented columns to the dataframe
social_network_ads_df['AgeGroup'] = pd.cut(social_network_ads_df['Age'], bins=age_bins)
social_network_ads_df['SalaryRange'] = pd.cut(social_network_ads_df['EstimatedSalary'], bins=salary_bins)

# Calculate purchase rate within each age group and salary range
age_group_purchase_rate = social_network_ads_df.groupby('AgeGroup')['Purchased'].mean()
salary_range_purchase_rate = social_network_ads_df.groupby('SalaryRange')['Purchased'].mean()

# Centrality Measures
degree_centrality = pd.Series(nx.degree_centrality(G))
betweenness_centrality = pd.Series(nx.betweenness_centrality(G))
closeness_centrality = pd.Series(nx.closeness_centrality(G))

# Add centrality measures to the dataframe
social_network_ads_df['DegreeCentrality'] = social_network_ads_df.index.map(degree_centrality)
social_network_ads_df['BetweennessCentrality'] = social_network_ads_df.index.map(betweenness_centrality)
social_network_ads_df['ClosenessCentrality'] = social_network_ads_df.index.map(closeness_centrality)

# Analyze purchasing behavior based on centrality measures
centrality_purchase_rate = social_network_ads_df.groupby('Purchased')[['DegreeCentrality', 'BetweennessCentrality', 'ClosenessCentrality']].mean()

# Correlation Analysis
correlation_matrix = social_network_ads_df[['Age', 'EstimatedSalary', 'Purchased']].corr()

age_group_purchase_rate, salary_range_purchase_rate, centrality_purchase_rate, correlation_matrix
