In [8]:
import pandas as pd
import numpy as np

# Load train and test datasets
train_data_path = 'train.csv'
test_data_path = 'test.csv'

# Load the training dataset
train = pd.read_csv(train_data_path)

# Load the testing dataset
test = pd.read_csv(test_data_path)

# Define weights for each attribute
weights = {
  'profile pic':0.8,
  'nums/length username':0.4,
  'fullname words':0.8,
  'nums/length fullname':0.8,
  'name==username':0.6,
  'description length':0.2,
  'external URL':0.05,
  'private':0.05,
  '#posts':0.8,
  '#followers':0.8,
  '#follows':0.1,
  'fake':0.9
}


# Calculate Trustworthiness Score for each user
def calculate_trustworthiness_score(row):
    trustworthiness_score = sum(row[attr] * weights[attr] for attr in weights.keys())
    return trustworthiness_score

# Calculate Trustworthiness Score for each user in train dataset
train['trustworthiness_score'] = train.apply(calculate_trustworthiness_score, axis=1)

# Calculate Trustworthiness Score for each user in test dataset
test['trustworthiness_score'] = test.apply(calculate_trustworthiness_score, axis=1)

# Print mean Trustworthiness Score for train and test datasets
print("Mean Trustworthiness Score (Train):", train['trustworthiness_score'].mean())
print("Mean Trustworthiness Score (Test):", test['trustworthiness_score'].mean())


Mean Trustworthiness Score (Train): 68389.46272569445
Mean Trustworthiness Score (Test): 39827.90445


In [9]:
import pandas as pd

# Define weights for each attribute
weights = {
    'profile pic':0.8,
  'nums/length username':0.4,
  'fullname words':0.8,
  'nums/length fullname':0.8,
  'name==username':0.6,
  'description length':0.2,
  'external URL':0.05,
  'private':0.05,
  '#posts':0.8,
  '#followers':0.8,
  '#follows':0.1,
  'fake':0.9# Importance of number of accounts followed
}

# Load your dataset
data = pd.read_csv("train.csv")

# Normalize the features (assuming Min-Max normalization)
normalized_data = (data - data.min()) / (data.max() - data.min())

# Apply the weights to each feature
weighted_data = normalized_data.copy()
for feature in weights:
    weighted_data[feature] *= weights[feature]

# Calculate trustworthiness score for each instance
trustworthiness_scores = weighted_data.sum(axis=1)

# Adjust trustworthiness score based on the 'fake' attribute
trustworthiness_scores = trustworthiness_scores - (data['fake'] * 10)

# Scale trustworthiness scores to range from 0 to 100
scaled_trustworthiness_scores = (trustworthiness_scores * 100).astype(int)

# Clip scores to be within the range of 0 to 100
scaled_trustworthiness_scores = scaled_trustworthiness_scores.clip(0, 100)

# Now you have trustworthiness scores ranging from 0 to 100 as whole numbers
print(scaled_trustworthiness_scores[:35])


0     100
1     100
2     100
3     100
4      98
5     100
6     100
7      93
8      93
9     100
10    100
11    100
12    100
13    100
14    100
15    100
16    100
17    100
18    100
19    100
20    100
21    100
22    100
23     94
24    100
25    100
26    100
27    100
28    100
29    100
30    100
31     92
32    100
33    100
34    100
dtype: int32


In [44]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression

# Load your dataset
data = pd.read_csv("train.csv")

# Define weights for each attribute
weights = {
    'profile pic': 0.8,
    'nums/length username': 0.4,
    'fullname words': 0.8,
    'nums/length fullname': 0.8,
    'name==username': 0.6,
    'description length': 0.2,
    'external URL': 0.05,
    'private': 0.05,
    '#posts': 0.8,
    '#followers': 0.8,
    '#follows': 0.1,
    'fake': 0.9
}

# Normalize the features (assuming Min-Max normalization)
normalized_data = (data - data.min()) / (data.max() - data.min())

# Apply the weights to each feature
weighted_data = normalized_data.copy()
for feature in weights:
    weighted_data[feature] *= weights[feature]

# Calculate trustworthiness score for each instance
trustworthiness_scores = weighted_data.sum(axis=1)

# Adjust trustworthiness score based on the 'fake' attribute
trustworthiness_scores = trustworthiness_scores - (data['fake'] * 10)

# Scale trustworthiness scores to range from 0 to 100
scaled_trustworthiness_scores = (trustworthiness_scores * 100).astype(int)

# Clip scores to be within the range of 0 to 100
scaled_trustworthiness_scores = scaled_trustworthiness_scores.clip(0, 100)

# Add the scaled trustworthiness scores column to the dataset
data['scaled_trustworthiness_scores'] = scaled_trustworthiness_scores

# Print the first 35 rows of the dataset with the new column
print(data.head(35))


    profile pic  nums/length username  fullname words  nums/length fullname  \
0             1                  0.27               0                   0.0   
1             1                  0.00               2                   0.0   
2             1                  0.10               2                   0.0   
3             1                  0.00               1                   0.0   
4             1                  0.00               2                   0.0   
5             1                  0.00               4                   0.0   
6             1                  0.00               2                   0.0   
7             1                  0.00               2                   0.0   
8             1                  0.00               0                   0.0   
9             1                  0.00               2                   0.0   
10            1                  0.00               2                   0.0   
11            1                  0.00               

In [45]:
print(data.head())

   profile pic  nums/length username  fullname words  nums/length fullname  \
0            1                  0.27               0                   0.0   
1            1                  0.00               2                   0.0   
2            1                  0.10               2                   0.0   
3            1                  0.00               1                   0.0   
4            1                  0.00               2                   0.0   

   name==username  description length  external URL  private  #posts  \
0               0                  53             0        0      32   
1               0                  44             0        0     286   
2               0                   0             0        1      13   
3               0                  82             0        0     679   
4               0                   0             0        1       6   

   #followers  #follows  fake  scaled_trustworthiness_scores  
0        1000       955     0      

In [46]:
import tensorflow as tf
X_train = data.drop(columns = ['scaled_trustworthiness_scores'])
X_test = data.drop(columns = ['scaled_trustworthiness_scores'])

print(X_train,X_test)

# Training and testing dataset (Outputs)
y_train = data['scaled_trustworthiness_scores']
y_test = data['scaled_trustworthiness_scores']

print(y_train,y_test)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Scale the data before training the model
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Initialize the Random Forest Regressor model
model = RandomForestRegressor(random_state=42)

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Calculate the Mean Squared Error (MSE) of the predictions
mse = mean_squared_error(y_test, y_pred)
print("Mean Squared Error:", mse)


     profile pic  nums/length username  fullname words  nums/length fullname  \
0              1                  0.27               0                  0.00   
1              1                  0.00               2                  0.00   
2              1                  0.10               2                  0.00   
3              1                  0.00               1                  0.00   
4              1                  0.00               2                  0.00   
..           ...                   ...             ...                   ...   
571            1                  0.55               1                  0.44   
572            1                  0.38               1                  0.33   
573            1                  0.57               2                  0.00   
574            1                  0.57               1                  0.00   
575            1                  0.27               1                  0.00   

     name==username  description length

In [47]:
print(data)

     profile pic  nums/length username  fullname words  nums/length fullname  \
0              1                  0.27               0                  0.00   
1              1                  0.00               2                  0.00   
2              1                  0.10               2                  0.00   
3              1                  0.00               1                  0.00   
4              1                  0.00               2                  0.00   
..           ...                   ...             ...                   ...   
571            1                  0.55               1                  0.44   
572            1                  0.38               1                  0.33   
573            1                  0.57               2                  0.00   
574            1                  0.57               1                  0.00   
575            1                  0.27               1                  0.00   

     name==username  description length

In [52]:
import tensorflow.keras
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout

# Define the model
model = Sequential()
model.add(Dense(50, input_dim=12, activation='relu'))  # Adjust input_dim to 12
model.add(Dense(150, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(150, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(25, activation='relu'))
model.add(Dropout(0.3))
model.add(Dense(2, activation='softmax'))

# Compile the model
model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])

# Print model summary
model.summary()


Model: "sequential_4"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense_20 (Dense)            (None, 50)                650       
                                                                 
 dense_21 (Dense)            (None, 150)               7650      
                                                                 
 dropout_12 (Dropout)        (None, 150)               0         
                                                                 
 dense_22 (Dense)            (None, 150)               22650     
                                                                 
 dropout_13 (Dropout)        (None, 150)               0         
                                                                 
 dense_23 (Dense)            (None, 25)                3775      
                                                                 
 dropout_14 (Dropout)        (None, 25)               

In [63]:
# 1. Prepare your input data
input_data = pd.DataFrame({
    'profile pic': [1],
    'nums/length username': [12],
    'fullname words': [15],
    'nums/length fullname': [12],
    'name==username': [0],
    'description length': [53],
    'external URL': [0],
    'private': [0],
    '#posts': [50],
    '#followers': [1000],
    '#follows': [95],
    'fake': [0]
})

# 2. Scale the input data using the same scaler
scaled_input_data = scaler.transform(input_data)

# 3. Use the trained model to predict the trustworthiness score
trustworthiness_score = model.predict(scaled_input_data)

print("Predicted Trustworthiness Score:", trustworthiness_score[0][0]*100)


Predicted Trustworthiness Score: 81.50360584259033


In [64]:
input_data = pd.DataFrame({
    'profile pic': [0],
    'nums/length username': [6],
    'fullname words': [5],
    'nums/length fullname': [4],
    'name==username': [0],
    'description length': [10],
    'external URL': [0],
    'private': [0],
    '#posts': [0],
    '#followers': [1],
    '#follows': [95],
    'fake': [0]
})

# 2. Scale the input data using the same scaler
scaled_input_data = scaler.transform(input_data)

# 3. Use the trained model to predict the trustworthiness score
trustworthiness_score = model.predict(scaled_input_data)

print("Predicted Trustworthiness Score:", trustworthiness_score[0][0]*100)


Predicted Trustworthiness Score: 66.95091128349304
