# Analyzing Typing Behavior: A Data-Driven Approach to Predicting Essay Quality
 
##  Importing Necessary Libraries:
 
* Importing essential libraries for data manipulation, visualization, and machine learning.

In [None]:
#Importing the necessary libraries:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestRegressor
from sklearn.metrics import mean_squared_error
from xgboost import XGBRegressor



In [None]:
# Load the training data
train_logs = pd.read_csv('/kaggle/input/linking-writing-processes-to-writing-quality/train_logs.csv')
train_scores = pd.read_csv('/kaggle/input/linking-writing-processes-to-writing-quality/train_scores.csv')

## Merging Logs and Scores:

* Merging the two datasets based on the unique essay ID.

In [None]:
# Merge logs and scores based on essay ID
train_data = pd.merge(train_logs, train_scores, on='id')

In [None]:
# Feature engineering for typing behavior features
typing_features = train_data.groupby('id').agg({
    'activity': 'count',                # Total number of activities
    'action_time': ['sum', 'mean'],     # Total and average action time
    'word_count': 'max',                # Maximum word count
    'text_change': 'nunique',           # Number of unique text changes
    'cursor_position': 'mean'           # Average cursor position
})

## Flatten the Multi-Level Column Index:

* Flattening the multi-level column index for ease of use.

In [None]:
# Flatten the multi-level column index
typing_features.columns = ['_'.join(col).strip() for col in typing_features.columns.values]


## Merging Typing Features with Scores:

* Merging the typing behavior features with essay scores.

In [None]:
# Merge typing features with scores
features = typing_features.merge(train_scores, on='id')