# Mental Rotation Research Analysis

Analysis of mental rotation articles from Google Scholar.

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

sns.set_style('whitegrid')
%matplotlib inline

## Load Data

In [None]:
# Find the most recent data file
data_dir = Path('../data')
csv_files = sorted(data_dir.glob('mental_rotation_articles_*.csv'))

if csv_files:
    df = pd.read_csv(csv_files[-1])
    print(f"Loaded {len(df)} articles from {csv_files[-1].name}")
else:
    print("No data files found. Run scripts/collect_articles.py first.")

## Exploratory Data Analysis

In [None]:
# Display basic information
df.info()

In [None]:
# Display first few rows
df.head()

## Publication Trends

In [None]:
# Publications by year
df['year'] = pd.to_numeric(df['year'], errors='coerce')
year_counts = df['year'].value_counts().sort_index()

plt.figure(figsize=(12, 6))
plt.plot(year_counts.index, year_counts.values, marker='o')
plt.xlabel('Year')
plt.ylabel('Number of Publications')
plt.title('Mental Rotation Publications Over Time')
plt.grid(True, alpha=0.3)
plt.show()

## Citation Analysis

In [None]:
# Top cited articles
top_cited = df.nlargest(10, 'citation_count')[['title', 'author', 'year', 'citation_count']]
print("Top 10 Most Cited Articles:")
print(top_cited.to_string(index=False))

In [None]:
# Citation distribution
plt.figure(figsize=(10, 6))
plt.hist(df['citation_count'], bins=30, edgecolor='black')
plt.xlabel('Citation Count')
plt.ylabel('Number of Articles')
plt.title('Distribution of Citations')
plt.show()

## Summary Statistics

In [None]:
print(f"Total articles: {len(df)}")
print(f"Year range: {df['year'].min():.0f} - {df['year'].max():.0f}")
print(f"Average citations: {df['citation_count'].mean():.1f}")
print(f"Median citations: {df['citation_count'].median():.1f}")
print(f"Most cited: {df['citation_count'].max():.0f}")