In [None]:
# Import necessary libraries
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error

# Load the demographics dataset
demographics_df = pd.read_csv('demographics.csv')

# Load the geographics dataset
geographics_df = pd.read_csv('geographics.csv')

# Load the competitors dataset
competitors_df = pd.read_csv('competitors.csv')

# Load the pois dataset
pois_df = pd.read_csv('pois.csv')

# Explore the datasets
print("Demographics:")
print(demographics_df.head())
print(demographics_df.info())
print(demographics_df.describe())

print("\nGeographics:")
print(geographics_df.head())
print(geographics_df.info())
print(geographics_df.describe())

print("\nCompetitors:")
print(competitors_df.head())
print(competitors_df.info())
print(competitors_df.describe())

print("\nPOIs:")
print(pois_df.head())
print(pois_df.info())
print(pois_df.describe())

# Select features and target variable
X = pd.concat([demographics_df, geographics_df, competitors_df, pois_df], axis=1)
y = demographics_df['education_level']

# Split the data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Create and train a linear regression model
model = LinearRegression()
model.fit(X_train, y_train)

# Make predictions on the testing set
y_pred = model.predict(X_test)

# Evaluate the model using mean squared error
mse = mean_squared_error(y_test, y_pred)
print(f'Mean Squared Error: {mse:.2f}')

# Use the model to make predictions on new data
new_data = pd.DataFrame({'age': [30, 40, 50], 'income': [60000, 80000, 100000]})
new_pred = model.predict(new_data)
print(new_pred)