### original graphs recreation

In [10]:
import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List

class PlotRecreator:
    def __init__(self, data_dir: str = 'analysis_results'):
        """Initialize the plot recreator with the data directory"""
        self.data_dir = data_dir
        
        # Load all data files
        with open(os.path.join(data_dir, 'raw_speech_data.json'), 'r') as f:
            self.raw_data = json.load(f)
            
        with open(os.path.join(data_dir, 'trend_data.json'), 'r') as f:
            self.trend_data = json.load(f)
            
        with open(os.path.join(data_dir, 'polarization_data.json'), 'r') as f:
            self.polarization_data = json.load(f)
            
        with open(os.path.join(data_dir, 'metadata.json'), 'r') as f:
            self.metadata = json.load(f)
        
        # Convert raw data to DataFrames
        self.raw_dfs = {
            congress: pd.DataFrame(data) 
            for congress, data in self.raw_data.items()
        }
    
    def recreate_party_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate party-level trend plots"""
        os.makedirs(save_dir, exist_ok=True)
        
        # Emotional Intensity by Party
        plt.figure(figsize=(12, 6))
        for party in ['D', 'R']:
            df = pd.DataFrame(self.trend_data['by_party'][party])
            plt.plot(df['congress'], df['emotional_avg'], 
                    label=f"{'Democratic' if party == 'D' else 'Republican'}")
            plt.fill_between(df['congress'],
                           df['emotional_avg'] - df['emotional_std'],
                           df['emotional_avg'] + df['emotional_std'],
                           alpha=0.2)
        
        plt.title('Emotional Intensity by Party Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Average Emotional Intensity')
        plt.ylim(1, 5)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/emotional_intensity_by_party.png")
        plt.close()
        
        # Political Spectrum by Party
        plt.figure(figsize=(12, 6))
        for party in ['D', 'R']:
            df = pd.DataFrame(self.trend_data['by_party'][party])
            plt.plot(df['congress'], df['political_avg'],
                    label=f"{'Democratic' if party == 'D' else 'Republican'}")
            plt.fill_between(df['congress'],
                           df['political_avg'] - df['political_std'],
                           df['political_avg'] + df['political_std'],
                           alpha=0.2)
        
        plt.title('Political Spectrum Position by Party Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Average Political Spectrum Position')
        plt.ylim(1, 5)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/political_spectrum_by_party.png")
        plt.close()
    
    def recreate_issue_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate issue-level trend plots"""
        base_dir = os.path.join(save_dir, 'issues')
        os.makedirs(base_dir, exist_ok=True)
        
        for issue in self.metadata['valid_issues']:
            issue_dir = os.path.join(base_dir, issue.lower().replace(' ', '_'))
            os.makedirs(issue_dir, exist_ok=True)
            
            # Emotional Intensity
            plt.figure(figsize=(12, 6))
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    plt.plot(df['congress'], df['emotional_avg'],
                            label=f"{'Democratic' if party == 'D' else 'Republican'}")
                    plt.fill_between(df['congress'],
                                   df['emotional_avg'] - df['emotional_std'],
                                   df['emotional_avg'] + df['emotional_std'],
                                   alpha=0.2)
            
            plt.title(f'Emotional Intensity Over Time: {issue}')
            plt.xlabel('Congress')
            plt.ylabel('Average Emotional Intensity')
            plt.ylim(1, 5)
            plt.legend()
            plt.grid(True)
            plt.savefig(f"{issue_dir}/emotional_intensity.png")
            plt.close()
            
            # Political Spectrum
            plt.figure(figsize=(12, 6))
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    plt.plot(df['congress'], df['political_avg'],
                            label=f"{'Democratic' if party == 'D' else 'Republican'}")
                    plt.fill_between(df['congress'],
                                   df['political_avg'] - df['political_std'],
                                   df['political_avg'] + df['political_std'],
                                   alpha=0.2)
            
            plt.title(f'Political Spectrum Position Over Time: {issue}')
            plt.xlabel('Congress')
            plt.ylabel('Average Political Spectrum Position')
            plt.ylim(1, 5)
            plt.legend()
            plt.grid(True)
            plt.savefig(f"{issue_dir}/political_spectrum.png")
            plt.close()
    
    def recreate_polarization_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate polarization trend plots"""
        os.makedirs(save_dir, exist_ok=True)
        
        plt.figure(figsize=(12, 6))
        df = pd.DataFrame(self.polarization_data['overall'])
        plt.plot(df['congress'], df['emotional_gap'], label='Emotional Gap')
        plt.plot(df['congress'], df['political_gap'], label='Political Gap')
        plt.title('Party Polarization Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Party Gap')
        plt.ylim(-4, 4)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/overall_polarization.png")
        plt.close()
    
    def recreate_issue_heatmaps(self, save_dir: str = 'recreated_plots'):
        """Recreate issue heatmaps"""
        heatmap_dir = os.path.join(save_dir, 'issue_heatmaps')
        os.makedirs(heatmap_dir, exist_ok=True)
        
        # Get all congresses and issues
        congresses = sorted(self.raw_data.keys(), key=int)
        issues = list(self.metadata['valid_issues'])
        
        # Initialize matrices
        prevalence_matrix = np.zeros((len(issues), len(congresses)))
        emotional_matrix = np.zeros((len(issues), len(congresses)))
        political_matrix = np.zeros((len(issues), len(congresses)))
        
        # Fill matrices using raw data
        for i, issue in enumerate(issues):
            for j, congress in enumerate(congresses):
                df = pd.DataFrame(self.raw_data[congress])
                # Find the LABEL that maps to this issue
                issue_label = [k for k, v in self.metadata['issue_map'].items() if v == issue][0]
                issue_mask = df['issues'].apply(lambda x: issue_label in x)
                issue_df = df[issue_mask]
                
                if not issue_df.empty:
                    prevalence_matrix[i, j] = len(issue_df) / len(df) * 100
                    emotional_matrix[i, j] = issue_df['emotional_intensity'].mean()
                    political_matrix[i, j] = issue_df['political_spectrum'].mean()
        
        # Plot heatmaps
        plt.figure(figsize=(15, 8))
        sns.heatmap(prevalence_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='YlOrRd',
                    annot=True,
                    fmt='.1f')
        plt.title('Issue Prevalence Over Time (%)')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_prevalence.png")
        plt.close()
        
        plt.figure(figsize=(15, 8))
        sns.heatmap(emotional_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='RdBu_r',
                    annot=True,
                    fmt='.2f',
                    vmin=1, vmax=5)
        plt.title('Average Emotional Intensity by Issue Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_emotional.png")
        plt.close()
        
        plt.figure(figsize=(15, 8))
        sns.heatmap(political_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='RdBu_r',
                    annot=True,
                    fmt='.2f',
                    vmin=1, vmax=5)
        plt.title('Average Political Position by Issue Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_political.png")
        plt.close()

    def recreate_all_plots(self, save_dir: str = 'recreated_plots'):
        """Recreate all plots"""
        print("Recreating all plots...")
        
        # Create main directory
        os.makedirs(save_dir, exist_ok=True)
        
        # Recreate each type of plot
        print("1. Recreating party trends...")
        self.recreate_party_trends(save_dir)
        
        print("2. Recreating issue trends...")
        self.recreate_issue_trends(save_dir)
        
        print("3. Recreating polarization trends...")
        self.recreate_polarization_trends(save_dir)
        
        print("4. Recreating issue heatmaps...")
        self.recreate_issue_heatmaps(save_dir)
        
        print("\nAll plots have been recreated successfully!")
        print(f"Output directory: {save_dir}")

def main():
    # Initialize the plot recreator
    recreator = PlotRecreator(data_dir='analysis_results')
    
    # Recreate all plots
    recreator.recreate_all_plots(save_dir='recreated_plots')

if __name__ == "__main__":
    main()

Recreating all plots...
1. Recreating party trends...
2. Recreating issue trends...
3. Recreating polarization trends...
4. Recreating issue heatmaps...

All plots have been recreated successfully!
Output directory: recreated_plots


### graphs recreation with no standard deviation shading

In [11]:
import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List

class PlotRecreator:
    def __init__(self, data_dir: str = 'analysis_results'):
        """Initialize the plot recreator with the data directory"""
        self.data_dir = data_dir
        
        # Load all data files
        with open(os.path.join(data_dir, 'raw_speech_data.json'), 'r') as f:
            self.raw_data = json.load(f)
            
        with open(os.path.join(data_dir, 'trend_data.json'), 'r') as f:
            self.trend_data = json.load(f)
            
        with open(os.path.join(data_dir, 'polarization_data.json'), 'r') as f:
            self.polarization_data = json.load(f)
            
        with open(os.path.join(data_dir, 'metadata.json'), 'r') as f:
            self.metadata = json.load(f)
        
        # Convert raw data to DataFrames
        self.raw_dfs = {
            congress: pd.DataFrame(data) 
            for congress, data in self.raw_data.items()
        }
    
    def recreate_party_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate party-level trend plots"""
        os.makedirs(save_dir, exist_ok=True)
        
        # Emotional Intensity by Party
        plt.figure(figsize=(12, 6))
        for party in ['D', 'R']:
            df = pd.DataFrame(self.trend_data['by_party'][party])
            plt.plot(df['congress'], df['emotional_avg'], 
                    label=f"{'Democratic' if party == 'D' else 'Republican'}")
        
        plt.title('Emotional Intensity by Party Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Average Emotional Intensity')
        plt.ylim(1, 5)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/emotional_intensity_by_party.png")
        plt.close()
        
        # Political Spectrum by Party
        plt.figure(figsize=(12, 6))
        for party in ['D', 'R']:
            df = pd.DataFrame(self.trend_data['by_party'][party])
            plt.plot(df['congress'], df['political_avg'],
                    label=f"{'Democratic' if party == 'D' else 'Republican'}")
        
        plt.title('Political Spectrum Position by Party Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Average Political Spectrum Position')
        plt.ylim(1, 5)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/political_spectrum_by_party.png")
        plt.close()
    
    def recreate_issue_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate issue-level trend plots"""
        base_dir = os.path.join(save_dir, 'issues')
        os.makedirs(base_dir, exist_ok=True)
        
        for issue in self.metadata['valid_issues']:
            issue_dir = os.path.join(base_dir, issue.lower().replace(' ', '_'))
            os.makedirs(issue_dir, exist_ok=True)
            
            # Emotional Intensity
            plt.figure(figsize=(12, 6))
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    plt.plot(df['congress'], df['emotional_avg'],
                            label=f"{'Democratic' if party == 'D' else 'Republican'}")
            
            plt.title(f'Emotional Intensity Over Time: {issue}')
            plt.xlabel('Congress')
            plt.ylabel('Average Emotional Intensity')
            plt.ylim(1, 5)
            plt.legend()
            plt.grid(True)
            plt.savefig(f"{issue_dir}/emotional_intensity.png")
            plt.close()
            
            # Political Spectrum
            plt.figure(figsize=(12, 6))
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    plt.plot(df['congress'], df['political_avg'],
                            label=f"{'Democratic' if party == 'D' else 'Republican'}")
            
            plt.title(f'Political Spectrum Position Over Time: {issue}')
            plt.xlabel('Congress')
            plt.ylabel('Average Political Spectrum Position')
            plt.ylim(1, 5)
            plt.legend()
            plt.grid(True)
            plt.savefig(f"{issue_dir}/political_spectrum.png")
            plt.close()
    
    def recreate_polarization_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate polarization trend plots"""
        os.makedirs(save_dir, exist_ok=True)
        
        plt.figure(figsize=(12, 6))
        df = pd.DataFrame(self.polarization_data['overall'])
        plt.plot(df['congress'], df['emotional_gap'], label='Emotional Gap')
        plt.plot(df['congress'], df['political_gap'], label='Political Gap')
        plt.title('Party Polarization Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Party Gap')
        plt.ylim(-4, 4)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/overall_polarization.png")
        plt.close()
    
    def recreate_issue_heatmaps(self, save_dir: str = 'recreated_plots'):
        """Recreate issue heatmaps"""
        heatmap_dir = os.path.join(save_dir, 'issue_heatmaps')
        os.makedirs(heatmap_dir, exist_ok=True)
        
        # Get all congresses and issues
        congresses = sorted(self.raw_data.keys(), key=int)
        issues = list(self.metadata['valid_issues'])
        
        # Initialize matrices
        prevalence_matrix = np.zeros((len(issues), len(congresses)))
        emotional_matrix = np.zeros((len(issues), len(congresses)))
        political_matrix = np.zeros((len(issues), len(congresses)))
        
        # Fill matrices using raw data
        for i, issue in enumerate(issues):
            for j, congress in enumerate(congresses):
                df = pd.DataFrame(self.raw_data[congress])
                # Find the LABEL that maps to this issue
                issue_label = [k for k, v in self.metadata['issue_map'].items() if v == issue][0]
                issue_mask = df['issues'].apply(lambda x: issue_label in x)
                issue_df = df[issue_mask]
                
                if not issue_df.empty:
                    prevalence_matrix[i, j] = len(issue_df) / len(df) * 100
                    emotional_matrix[i, j] = issue_df['emotional_intensity'].mean()
                    political_matrix[i, j] = issue_df['political_spectrum'].mean()
        
        # Plot heatmaps
        plt.figure(figsize=(15, 8))
        sns.heatmap(prevalence_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='YlOrRd',
                    annot=True,
                    fmt='.1f')
        plt.title('Issue Prevalence Over Time (%)')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_prevalence.png")
        plt.close()
        
        plt.figure(figsize=(15, 8))
        sns.heatmap(emotional_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='RdBu_r',
                    annot=True,
                    fmt='.2f',
                    vmin=1, vmax=5)
        plt.title('Average Emotional Intensity by Issue Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_emotional.png")
        plt.close()
        
        plt.figure(figsize=(15, 8))
        sns.heatmap(political_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='RdBu_r',
                    annot=True,
                    fmt='.2f',
                    vmin=1, vmax=5)
        plt.title('Average Political Position by Issue Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_political.png")
        plt.close()

    def recreate_all_plots(self, save_dir: str = 'recreated_plots'):
        """Recreate all plots"""
        print("Recreating all plots...")
        
        # Create main directory
        os.makedirs(save_dir, exist_ok=True)
        
        # Recreate each type of plot
        print("1. Recreating party trends...")
        self.recreate_party_trends(save_dir)
        
        print("2. Recreating issue trends...")
        self.recreate_issue_trends(save_dir)
        
        print("3. Recreating polarization trends...")
        self.recreate_polarization_trends(save_dir)
        
        print("4. Recreating issue heatmaps...")
        self.recreate_issue_heatmaps(save_dir)
        
        print("\nAll plots have been recreated successfully!")
        print(f"Output directory: {save_dir}")

def main():
    # Initialize the plot recreator
    recreator = PlotRecreator(data_dir='analysis_results')
    
    # Recreate all plots
    recreator.recreate_all_plots(save_dir='recreated_plots')

if __name__ == "__main__":
    main()

Recreating all plots...
1. Recreating party trends...
2. Recreating issue trends...
3. Recreating polarization trends...
4. Recreating issue heatmaps...

All plots have been recreated successfully!
Output directory: recreated_plots
