# CommitTrader - Example Analysis

This notebook demonstrates how to use CommitTrader to analyze the relationship between GitHub activity and stock prices.

In [None]:
import sys
sys.path.append('..')

import pandas as pd
import matplotlib.pyplot as plt
from datetime import datetime, timedelta

from src.analysis.pipeline import AnalysisPipeline
from src.data.github_collector import GitHubCollector
from src.data.stock_collector import StockCollector
from src.data.company_mapper import CompanyMapper
from src.visualization.plots import ResultsVisualizer
from src.data.storage import DataStorage

# Suppress warnings
import warnings
warnings.filterwarnings('ignore')

## 1. Setup and Configuration

In [None]:
# Initialize components
github_token = None  # Set your token here or use environment variable

github_collector = GitHubCollector(github_token)
stock_collector = StockCollector()
mapper = CompanyMapper()
storage = DataStorage()
visualizer = ResultsVisualizer()

# Check rate limits
rate_info = github_collector.get_rate_limit_info()
print(f"GitHub API Rate Limit: {rate_info['core_remaining']}/{rate_info['core_limit']}")

## 2. Explore Company Mappings

In [None]:
# Load and view company mappings
mappings = mapper.mappings
print(f"Total companies: {mappings['ticker'].nunique()}")
print(f"Total repositories: {len(mappings)}")

# Show sample mappings
mappings.head(10)

In [None]:
# Companies by sector
mappings.groupby('sector')['ticker'].nunique().plot(kind='bar', figsize=(10, 6))
plt.title('Number of Companies by Sector')
plt.ylabel('Count')
plt.show()

## 3. Collect GitHub Events (Example: Single Company)

In [None]:
# Example: Collect Microsoft VSCode releases
repo = 'microsoft/vscode'
start_date = datetime(2022, 1, 1)
end_date = datetime(2023, 12, 31)

releases = github_collector.collect_releases(repo, start_date, end_date)
print(f"Found {len(releases)} releases")
releases.head()

## 4. Collect Stock Data

In [None]:
# Get Microsoft stock data
msft_stock = stock_collector.get_stock_data('MSFT', start_date, end_date)

# Plot stock price
plt.figure(figsize=(12, 6))
plt.plot(msft_stock.index, msft_stock['close'])
plt.title('Microsoft Stock Price')
plt.xlabel('Date')
plt.ylabel('Price ($)')
plt.grid(True, alpha=0.3)
plt.show()

# Calculate returns
returns = stock_collector.calculate_returns(msft_stock)
print(f"Mean daily return: {returns.mean()*100:.4f}%")
print(f"Volatility: {returns.std()*100:.4f}%")

## 5. Run Event Study (Single Event)

In [None]:
from src.analysis.event_study import EventStudy

event_study = EventStudy()

# Analyze a single release
if not releases.empty:
    first_release = releases.iloc[0]
    
    result = event_study.analyze_event(
        ticker='MSFT',
        event_date=first_release['published_at'],
        event_type='release',
        event_metadata={'tag': first_release['tag_name']}
    )
    
    print("Event Study Results:")
    print(f"Event Date: {result['event_date']}")
    print(f"AR (Day 0): {result['ar_day_0']*100:.4f}%")
    print(f"CAR (-5, 5): {result['CAR_-5_5']*100:.4f}%")
    print(f"Valid: {result['valid']}")

## 6. Run Full Analysis Pipeline

In [None]:
# Initialize pipeline
pipeline = AnalysisPipeline(github_token=github_token)

# Run analysis for a few companies (use max_events for quick testing)
summary = pipeline.run_full_analysis(
    tickers=['MSFT', 'GOOGL', 'META'],
    start_date=datetime(2023, 1, 1),
    end_date=datetime(2023, 12, 31),
    max_events=50  # Limit for testing
)

In [None]:
# Print summary
import json
print(json.dumps(summary, indent=2, default=str))

## 7. Load and Visualize Results

In [None]:
# Load latest results
results = storage.load_event_study_results('full_analysis')

if results is not None:
    print(f"Loaded {len(results)} event study results")
    
    # Filter valid results
    valid_results = results[results['valid'] == True]
    print(f"Valid results: {len(valid_results)}")
    
    # Display statistics
    valid_results[['ar_day_0', 'CAR_0_0', 'CAR_-1_1', 'CAR_-5_5']].describe()

In [None]:
# Plot CAR distribution
fig = visualizer.plot_car_distribution(results, car_column='CAR_-5_5')
plt.show()

In [None]:
# Plot AR by event type
fig = visualizer.plot_ar_by_event_type(results, ar_column='ar_day_0')
plt.show()

## 8. Statistical Analysis

In [None]:
from src.analysis.statistics import StatisticalTests

stats_tests = StatisticalTests()

# Run all statistical tests
test_results = stats_tests.perform_all_tests(results, ar_column='ar_day_0')

# Create summary table
summary_table = stats_tests.create_summary_table(test_results)
summary_table

In [None]:
# Visualize statistical significance
fig = visualizer.plot_statistical_significance(test_results)
plt.show()

## 9. Aggregate Analysis by Event Type

In [None]:
from src.analysis.event_study import EventStudy

event_study = EventStudy()

# Aggregate results by event type
aggregated = event_study.aggregate_results(results, group_by='event_type')
aggregated

## 10. Create Summary Dashboard

In [None]:
# Load events data
events = storage.load_events('all_events')

if events is not None and results is not None:
    # Create comprehensive dashboard
    fig = visualizer.create_summary_dashboard(
        events=events,
        results=results,
        aggregated=aggregated,
        statistical_tests=test_results
    )
    plt.show()

## Conclusion

This notebook demonstrates the basic workflow for CommitTrader:

1. **Data Collection**: Gather GitHub events and stock prices
2. **Event Study**: Calculate abnormal returns for each event
3. **Statistical Testing**: Test significance of results
4. **Visualization**: Create plots and dashboards
5. **Interpretation**: Analyze relationship between GitHub activity and stock prices

### Next Steps

- Expand the company list in `data/mappings/company_repo_mappings.csv`
- Experiment with different event windows in `config.yaml`
- Try different expected return models (market, mean-adjusted, market-adjusted)
- Analyze specific sectors or repository characteristics
- Export results for further analysis or publication