In [1]:
import pandas as pd
import numpy as np
import random

# Set the random seed for reproducibility
np.random.seed(42)
random.seed(42)

# Number of websites
num_websites = 100

# Generate random names of websites
def generate_website_name():
    letters = "abcdefghijklmnopqrstuvwxyz"
    name_length = random.randint(5, 10)
    name = ''.join(random.choice(letters) for _ in range(name_length))

    # Randomly select domain extension
    domain_extension = random.choice(['.com', '.gov', '.edu', '.org', '.net', '.in', '.de'])

    return name + domain_extension

website_names = [generate_website_name() for _ in range(num_websites)]

# Generate random values for features
multilingual = np.random.randint(0, 6, size=num_websites)
attachments = np.random.randint(0, 50, size=num_websites)
payment = np.random.choice(['yes', 'no'], size=num_websites)
formulare = np.random.randint(0, 20, size=num_websites)
contact_info = np.random.choice(['yes', 'no'], size=num_websites)
chatbot = np.random.choice(['yes', 'no'], size=num_websites)
socialmedia = np.random.randint(0, 8, size=num_websites)

# Calculate digital_score based on correlation with features
digital_score = (
    0.4 * (payment == 'yes').astype(float) +
    0.3 * (formulare > 10).astype(float) +
    0.2 * (chatbot == 'yes').astype(float) +
    0.1 * (attachments > 25).astype(float)
)

# Generate random European countries as locations
european_countries = ['Switzerland', 'Belgium', 'Bulgaria', 'Croatia', 'Cyprus', 'Czech Republic',
                      'Denmark', 'Estonia', 'Finland', 'France', 'Germany', 'Greece',
                      'Hungary', 'Ireland', 'Italy', 'Latvia', 'Lithuania', 'Luxembourg',
                      'Malta', 'Netherlands', 'Poland', 'Portugal', 'Romania', 'Slovakia',
                      'Slovenia', 'Spain', 'Sweden']

locations = random.choices(european_countries, k=num_websites)

# Create a DataFrame with the generated data
data = pd.DataFrame({
    'Website': website_names,
    'multilingual': multilingual,
    'attachments': attachments,
    'payment': payment,
    'formulare': formulare,
    'contact_info': contact_info,
    'chatbot': chatbot,
    'socialmedia': socialmedia,
    'digital_score': digital_score,
    'location': locations
})

# Display the generated data
data.head(10)


Unnamed: 0,Website,multilingual,attachments,payment,formulare,contact_info,chatbot,socialmedia,digital_score,location
0,daxihhexdv.in,3,14,no,17,yes,no,1,0.3,Spain
1,csnbacghq.net,4,44,yes,9,no,no,2,0.5,Bulgaria
2,rgwuw.net,2,0,yes,2,no,yes,3,0.6,Netherlands
3,hosizayz.gov,4,24,yes,6,yes,yes,4,0.6,Ireland
4,nkiegykdcm.com,4,6,no,15,yes,no,4,0.3,Poland
5,ltizbxo.net,1,8,no,15,no,no,3,0.3,Malta
6,mcrju.net,2,23,no,19,yes,no,6,0.3,Luxembourg
7,sgwcbvh.de,2,0,yes,16,no,no,5,0.7,Ireland
8,chdmiou.de,2,43,yes,1,yes,no,1,0.5,Portugal
9,fllgviw.in,4,7,no,0,no,yes,4,0.2,Bulgaria


In [2]:
data.to_csv("locations_100.csv")