Python script to generate a dataset about the spread of misinformation on social media.

In [3]:
import pandas as pd
import numpy as np

# Function to generate dataset
def generate_misinformation_dataset(platforms, countries, age_range, gender_distribution, sample_size_per_country_platform):
    np.random.seed(42)  # For reproducibility
    data = []

    for platform in platforms:
        for country in countries:
            for _ in range(sample_size_per_country_platform):
                data.append({
                    "User_ID": f"User_{len(data) + 1}",
                    "Platform": platform,
                    "Country": country,
                    "Age": np.random.randint(age_range[0], age_range[1] + 1),
                    "Gender": np.random.choice(gender_distribution, p=[0.5, 0.4, 0.1]),
                    "Post_Type": np.random.choice(["Text", "Image", "Video", "Mixed"]),
                    "Engagements": np.random.randint(10, 500),
                    "Misinformation_Spread": np.random.choice(
                        [0, 1], # 0 = No, 1 = Yes
                        p=[0.5, 0.5]
                    )
                })

    # Create DataFrame
    df = pd.DataFrame(data)
    return df

# User inputs
platforms = input("Enter social platforms (comma-separated, e.g., Facebook, Twitter): ").split(",")
countries = input("Enter countries (comma-separated, e.g., USA, UK, Canada): ").split(",")
age_range = list(map(int, input("Enter age range (e.g., 18 65): ").split()))
gender_distribution = ["Male", "Female", "Other"]
sample_size_per_country_platform = int(input("Enter sample size per country-platform pair: "))

# Generate dataset
dataset = generate_misinformation_dataset(platforms, countries, age_range, gender_distribution, sample_size_per_country_platform)

# Save to CSV
dataset.to_csv("data.csv", index=False)
print("Dataset saved as 'data.csv'")
print(dataset.head())


Dataset saved as 'data.csv'
  User_ID  Platform Country  Age  Gender Post_Type  Engagements  \
0  User_1  Facebook   China   56  Female     Video          116   
1  User_2  Facebook   China   38    Male     Video          224   
2  User_3  Facebook   China   41    Male     Mixed          161   
3  User_4  Facebook   China   19  Female     Image          303   
4  User_5  Facebook   China   38  Female     Image           31   

   Misinformation_Spread  
0                      1  
1                      0  
2                      1  
3                      0  
4                      0  
