In [1]:
import os
from google.colab import drive

# Mount Drive
drive.mount('/content/drive')

# 'project_root': The main folder containing the entire application structure.
project_root = '/content/drive/MyDrive/story_generation_comprehension'

# 'dataset_path': Specific location where the generated synthetic data will be stored.
dataset_path = os.path.join(project_root, 'story_generator', 'dataset')

Mounted at /content/drive


In [2]:
import random
from datetime import datetime, timedelta
import pandas as pd
import sys

# Ensure the project root is in the system path so we can import modules from 'app/src'
if project_root not in sys.path:
    sys.path.append(project_root)

# Set a random seed for reproducibility
random.seed(42)


## Generate synthatic game data

In [3]:
# Constants
NUM_SAMPLES = 100000 # Adjust the number of data points you want to generate
MAX_ROUNDS = 9
MAX_ROUNDS_mark = MAX_ROUNDS*100  # Maximum rounds per level
MAX_SCORE_RANGE = 100  # Maximum score (XP) range
MAX_TIME_FOR_MAX_SCORE = 10*60  # Maximum time (second) for achieving max score
game_level=15
max_engagement_time=15*60 #second

# Function to generate synthetic data
def generate_synthetic_data():
    data = []
    start_time = datetime(2025, 11, 1, 8, 0, 0)

    for i in range(NUM_SAMPLES):
        timestamp = start_time + timedelta(minutes=random.randint(1, 720))
        game_level = random.randint(1, 15)
         # Adjust max rounds based on game level
        success_count= random.randint(1, MAX_ROUNDS)
        success_count_mark=random.randint(1, success_count*100)
        attempt_count = random.randint(success_count, MAX_ROUNDS )
        attempt_count_mark=attempt_count*100
        engagement_time_total = sum([random.uniform(60, max_engagement_time) for _ in range(attempt_count)]) #all attend ,seconds

        # Calculate the game score (XP) based on your logic
        score_total_level = calculate_game_score(success_count_mark,attempt_count_mark, attempt_count,engagement_time_total , MAX_ROUNDS )

        data.append([timestamp,success_count_mark,attempt_count,attempt_count_mark,score_total_level, game_level,engagement_time_total])

        # Print progress
        if (i + 1) % 10000 == 0:
            print(f"Generated {i + 1} samples")

    return data


In [4]:
%%writefile /content/drive/MyDrive/story_generation_comprehension/app/src/calculate_game_score.py

# Constants
NUM_SAMPLES = 100000  # Adjust the number of data points you want to generate
MAX_SCORE_RANGE = 100  # Maximum score (XP) range
MAX_TIME_FOR_MAX_SCORE = 10*60  # Maximum time (second) for achieving max score
game_level=15
#max_engagement_time=360 #second


# Function to calculate game score (XP) based on your logic
def calculate_game_score(success_count_mark,attempt_count_mark, attempt_count, engagement_time_total, max_rounds):
    # Calculate the maximum score based on your criteria
    max_score = MAX_SCORE_RANGE

    # Calculate the time threshold for achieving max score
    #max_time_threshold = max(1, max_rounds / MAX_TIME_FOR_MAX_SCORE)
    max_time_threshold_per_attend =engagement_time_total / attempt_count

    # Calculate score based on your criteria
    if success_count_mark == attempt_count_mark and max_time_threshold_per_attend  <= MAX_TIME_FOR_MAX_SCORE:
        score = max_score
    elif success_count_mark <= attempt_count_mark and max_time_threshold_per_attend  <= MAX_TIME_FOR_MAX_SCORE:
        score = ((success_count_mark / attempt_count_mark) * max_score*0.5)+(max_score*0.5)

    elif  success_count_mark <= attempt_count_mark and max_time_threshold_per_attend >= MAX_TIME_FOR_MAX_SCORE:
        score = ((MAX_TIME_FOR_MAX_SCORE / max_time_threshold_per_attend) * max_score*0.5)+((success_count_mark / attempt_count_mark) * max_score*0.5)

    score=min(score,max_score)

    return int(score)


Overwriting /content/drive/MyDrive/story_generation_comprehension/app/src/calculate_game_score.py


In [5]:
# Import the scoring function from the 'app.src' package.
# We use a try-except block to handle potential path issues gracefully.
try:
    from app.src.calculate_game_score import calculate_game_score
    print("Successfully imported 'calculate_game_score' from app/src/")
except ImportError as e:
    print(f" Import Failed: {e}")
    print("Hint: Ensure 'project_root' is correctly added to sys.path in Cell 2.")

Successfully imported 'calculate_game_score' from app/src/


In [6]:
# 1. Generate the data
combined_data = generate_synthetic_data()

# 2. Convert to DataFrame
columns = [
    "timestamp", "success_count_mark", "attempt_count",
    "attempt_count_mark", "score_total_level", "game_level", "engagement_time_Total_sec"
]
df = pd.DataFrame(combined_data, columns=columns)

# 3. Save Raw Data
raw_filename = "combined_game_data.csv"
df.to_csv(raw_filename, index=False)
print(f"Raw dataset saved to: {os.path.join(dataset_path, raw_filename)}")

Generated 10000 samples
Generated 20000 samples
Generated 30000 samples
Generated 40000 samples
Generated 50000 samples
Generated 60000 samples
Generated 70000 samples
Generated 80000 samples
Generated 90000 samples
Generated 100000 samples
Raw dataset saved to: /content/drive/MyDrive/story_generation_comprehension/story_generator/dataset/combined_game_data.csv


## calculate_improvement_score

In [7]:
def calculate_improvement_score(row):
    """
    Computes a weighted improvement score (1-10) based on multiple factors:
    Success Rate (30%), Attempts (20%), Level Difficulty (20%), Engagement Efficiency (30%).
    """
    # Define max marks locally to ensure function is self-contained
    MAX_ROUNDS_mark = 900

    # 1. Normalize Factors (Scale 0-10)
    norm_success = (row["success_count_mark"] / MAX_ROUNDS_mark) * 10
    norm_attempt = (1 - (row["attempt_count_mark"] / MAX_ROUNDS_mark)) * 10
    norm_level = (row["game_level"] / 15) * 10

    # Avoid division by zero
    max_possible_time = (15 * 60) * row["attempt_count"]
    norm_engagement = (1 - (row["engagement_time_Total_sec"] / max_possible_time)) * 10

    # 2. Weighted Sum
    improvement = (0.3 * norm_success) + (0.2 * norm_attempt) + (0.2 * norm_level) + (0.3 * norm_engagement)

    # 3. Adjustment Factor (Reward high performance across all metrics)
    adjustment = (norm_success + norm_attempt + norm_level - norm_engagement) / 4
    improvement += adjustment

    # 4. Clamp result between 1 and 10
    return max(1, min(improvement, 10))

# Apply the function
print("Calculating Improvement Scores...")
df["improvement_score"] = df.apply(calculate_improvement_score, axis=1)

# Save Final Processed Data
final_filename = "game_data_with_improvement.csv"
df.to_csv(final_filename, index=False)
print(f"Final processed dataset saved to: {os.path.join(dataset_path, final_filename)}")

Calculating Improvement Scores...
Final processed dataset saved to: /content/drive/MyDrive/story_generation_comprehension/story_generator/dataset/game_data_with_improvement.csv
