In [45]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn import metrics
import matplotlib.pyplot as plt

# Create a DataFrame with your data
data = {
    'Type of motivation': ['Race or ethnicity', 'Religion', 'Sexual orientation', 'Language', 'Disability', 'Sex and gender', 'Age', 'Other similar factor', 'Unknown motivation'],
    2014: [611, 429, 155, 12, 10, 22, 6, 10, 23],
    2015: [641, 469, 141, 18, 8, 12, 4, 27, 25],
    2016: [666, 460, 176, 13, 11, 24, 5, 44, 19],
    2017: [878, 842, 204, 23, 10, 32, 4, 35, 32],
    2018: [793, 657, 186, 14, 9, 54, 9, 48, 22],
    2019: [884, 613, 265, 25, 3, 56, 8, 73, 39],
    2020: [1619, 530, 258, 37, 8, 49, 5, 58, 39],
    2021: [1745, 886, 438, 33, 16, 60, 14, 101, 81],
    2022: [1950, 750, 491, 59, 15, 89, 7, 82, 117]
}

df = pd.DataFrame(data)

# One-hot encode the 'Type of motivation' column
df_encoded = pd.get_dummies(df, columns=['Type of motivation'], drop_first=True)

# Extract the years as the target variable
y = df_encoded.index.astype(int)

# Prepare the data for training and testing
X = df_encoded.drop(columns='Type of motivation_Unknown motivation').values  # Exclude 'Unknown motivation' from features

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create a linear regression model
model = LinearRegression()

# Train the model
model.fit(X_train, y_train)

# Make predictions on the test set
y_pred = model.predict(X_test)

# Print the coefficients and intercept
print('Coefficients:', model.coef_)
print('Intercept:', model.intercept_)

# Evaluate the model
print('Mean Absolute Error:', metrics.mean_absolute_error(y_test, y_pred))
print('Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred))
print('Root Mean Squared Error:', metrics.mean_squared_error(y_test, y_pred, squared=False))

# Plot the predictions against the actual values



Coefficients: [-0.04719763 -0.09205012 -0.17475958 -0.1623449   0.0377273  -0.04858991
  0.01553657  0.2022505  -0.01214083 -0.01731579 -0.01669767  0.
  0.00055883  0.          0.00065501 -0.00274843]
Intercept: 5.3995332532367115
Mean Absolute Error: 51.72353487314306
Mean Squared Error: 5282.049744004888
Root Mean Squared Error: 72.67771146647979


In [44]:
import pandas as pd

# Assuming you have new data for the years 2023 and beyond
# Assuming you have new data for the years 2023 and beyond
new_data = {
    'Type of motivation': ['Race or ethnicity', 'Religion', 'Sexual orientation', 'Language', 'Disability', 'Sex and gender', 'Age', 'Other similar factor', 'Unknown motivation'],
    2023: [700, 500, 180, 25, 15, 60, 10, 90, 120],  # Replace with your actual values
    2024: [750, 550, 200, 28, 18, 65, 12, 95, 130],  # Replace with your actual values
    # Add more years as needed
}

# Continue with the rest of the code to make predictions on the new data


# Create a DataFrame for the new data
new_df = pd.DataFrame(new_data)

# One-hot encode the 'Type of motivation' column
new_df_encoded = pd.get_dummies(new_df, columns=['Type of motivation'], drop_first=True)

# Ensure the columns in new_df_encoded match the columns used during training
missing_columns = set(df_encoded.columns) - set(new_df_encoded.columns)
for col in missing_columns:
    new_df_encoded[col] = 0

# Ensure the order of columns is the same
new_df_encoded = new_df_encoded[df_encoded.drop(columns='Type of motivation_Unknown motivation').columns]

# Make predictions on the new data
new_predictions = model.predict(new_df_encoded.values)

# Print or use the new predictions
print('Predictions for 2023 and beyond:', new_predictions)


Predictions for 2023 and beyond: [5.40009208 5.39953325 5.39678482 5.38283559 5.38221746 5.40018826
 5.39953325 5.39953325 5.39953325]


In [52]:

import pandas as pd

# Assuming you have new data for the years 2023 and beyond
# Assuming you have new data for the years 2023 and beyond
new_data = {
    'Type of motivation': ['Race or ethnicity', 'Religion', 'Sexual orientation', 'Language', 'Disability', 'Sex and gender', 'Age', 'Other similar factor', 'Unknown motivation'],
    2023: [700, 500, 180, 25, 15, 60, 10, 90, 120],  
    2024: [750, 550, 200, 28, 18, 65, 12, 95, 130],  
    # Add more years as needed
}

new_df = pd.DataFrame(new_data)

# One-hot encode the 'Type of motivation' column
new_df_encoded = pd.get_dummies(new_df, columns=['Type of motivation'], drop_first=True)

# Ensure the columns in new_df_encoded match the columns used during training
missing_columns = set(df_encoded.columns) - set(new_df_encoded.columns)
for col in missing_columns:
    new_df_encoded[col] = 0

# Ensure the order of columns is the same
new_df_encoded = new_df_encoded[df_encoded.drop(columns='Type of motivation_Unknown motivation').columns]

# Make predictions on the new data
new_predictions = model.predict(new_df_encoded.values)

# Display the predictions for the next 10 years
for year, prediction in zip(range(2023, 2033), new_predictions):
    print(f'Predicted hate crime for {year}: {prediction}')

Predicted hate crime for 2023: 5.4000920814557
Predicted hate crime for 2024: 5.3995332532367115
Predicted hate crime for 2025: 5.396784819515936
Predicted hate crime for 2026: 5.382835586981297
Predicted hate crime for 2027: 5.382217459564865
Predicted hate crime for 2028: 5.400188258751411
Predicted hate crime for 2029: 5.3995332532367115
Predicted hate crime for 2030: 5.3995332532367115
Predicted hate crime for 2031: 5.3995332532367115
