### original graphs recreation

In [26]:
import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List

class PlotRecreator:
    def __init__(self, data_dir: str = 'analysis_results'):
        """Initialize the plot recreator with the data directory"""
        self.data_dir = data_dir
        
        # Load all data files
        with open(os.path.join(data_dir, 'raw_speech_data.json'), 'r') as f:
            self.raw_data = json.load(f)
            
        with open(os.path.join(data_dir, 'trend_data.json'), 'r') as f:
            self.trend_data = json.load(f)
            
        with open(os.path.join(data_dir, 'polarization_data.json'), 'r') as f:
            self.polarization_data = json.load(f)
            
        with open(os.path.join(data_dir, 'metadata.json'), 'r') as f:
            self.metadata = json.load(f)
        
        # Convert raw data to DataFrames
        self.raw_dfs = {
            congress: pd.DataFrame(data) 
            for congress, data in self.raw_data.items()
        }
    
    def recreate_party_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate party-level trend plots"""
        os.makedirs(save_dir, exist_ok=True)
        
        # Emotional Intensity by Party
        plt.figure(figsize=(12, 6))
        for party in ['D', 'R']:
            df = pd.DataFrame(self.trend_data['by_party'][party])
            plt.plot(df['congress'], df['emotional_avg'], 
                    label=f"{'Democratic' if party == 'D' else 'Republican'}")
            plt.fill_between(df['congress'],
                           df['emotional_avg'] - df['emotional_std'],
                           df['emotional_avg'] + df['emotional_std'],
                           alpha=0.2)
        
        plt.title('Emotional Intensity by Party Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Average Emotional Intensity')
        plt.ylim(1, 5)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/emotional_intensity_by_party.png")
        plt.close()
        
        # Political Spectrum by Party
        plt.figure(figsize=(12, 6))
        for party in ['D', 'R']:
            df = pd.DataFrame(self.trend_data['by_party'][party])
            plt.plot(df['congress'], df['political_avg'],
                    label=f"{'Democratic' if party == 'D' else 'Republican'}")
            plt.fill_between(df['congress'],
                           df['political_avg'] - df['political_std'],
                           df['political_avg'] + df['political_std'],
                           alpha=0.2)
        
        plt.title('Political Spectrum Position by Party Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Average Political Spectrum Position')
        plt.ylim(1, 5)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/political_spectrum_by_party.png")
        plt.close()
    
    def recreate_issue_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate issue-level trend plots"""
        base_dir = os.path.join(save_dir, 'issues')
        os.makedirs(base_dir, exist_ok=True)
        
        for issue in self.metadata['valid_issues']:
            issue_dir = os.path.join(base_dir, issue.lower().replace(' ', '_'))
            os.makedirs(issue_dir, exist_ok=True)
            
            # Emotional Intensity
            plt.figure(figsize=(12, 6))
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    plt.plot(df['congress'], df['emotional_avg'],
                            label=f"{'Democratic' if party == 'D' else 'Republican'}")
                    plt.fill_between(df['congress'],
                                   df['emotional_avg'] - df['emotional_std'],
                                   df['emotional_avg'] + df['emotional_std'],
                                   alpha=0.2)
            
            plt.title(f'Emotional Intensity Over Time: {issue}')
            plt.xlabel('Congress')
            plt.ylabel('Average Emotional Intensity')
            plt.ylim(1, 5)
            plt.legend()
            plt.grid(True)
            plt.savefig(f"{issue_dir}/emotional_intensity.png")
            plt.close()
            
            # Political Spectrum
            plt.figure(figsize=(12, 6))
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    plt.plot(df['congress'], df['political_avg'],
                            label=f"{'Democratic' if party == 'D' else 'Republican'}")
                    plt.fill_between(df['congress'],
                                   df['political_avg'] - df['political_std'],
                                   df['political_avg'] + df['political_std'],
                                   alpha=0.2)
            
            plt.title(f'Political Spectrum Position Over Time: {issue}')
            plt.xlabel('Congress')
            plt.ylabel('Average Political Spectrum Position')
            plt.ylim(1, 5)
            plt.legend()
            plt.grid(True)
            plt.savefig(f"{issue_dir}/political_spectrum.png")
            plt.close()
    
    def recreate_polarization_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate polarization trend plots"""
        os.makedirs(save_dir, exist_ok=True)
        
        plt.figure(figsize=(12, 6))
        df = pd.DataFrame(self.polarization_data['overall'])
        plt.plot(df['congress'], df['emotional_gap'], label='Emotional Gap')
        plt.plot(df['congress'], df['political_gap'], label='Political Gap')
        plt.title('Party Polarization Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Party Gap')
        plt.ylim(-4, 4)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/overall_polarization.png")
        plt.close()
    
    def recreate_issue_heatmaps(self, save_dir: str = 'recreated_plots'):
        """Recreate issue heatmaps"""
        heatmap_dir = os.path.join(save_dir, 'issue_heatmaps')
        os.makedirs(heatmap_dir, exist_ok=True)
        
        # Get all congresses and issues
        congresses = sorted(self.raw_data.keys(), key=int)
        issues = list(self.metadata['valid_issues'])
        
        # Initialize matrices
        prevalence_matrix = np.zeros((len(issues), len(congresses)))
        emotional_matrix = np.zeros((len(issues), len(congresses)))
        political_matrix = np.zeros((len(issues), len(congresses)))
        
        # Fill matrices using raw data
        for i, issue in enumerate(issues):
            for j, congress in enumerate(congresses):
                df = pd.DataFrame(self.raw_data[congress])
                # Find the LABEL that maps to this issue
                issue_label = [k for k, v in self.metadata['issue_map'].items() if v == issue][0]
                issue_mask = df['issues'].apply(lambda x: issue_label in x)
                issue_df = df[issue_mask]
                
                if not issue_df.empty:
                    prevalence_matrix[i, j] = len(issue_df) / len(df) * 100
                    emotional_matrix[i, j] = issue_df['emotional_intensity'].mean()
                    political_matrix[i, j] = issue_df['political_spectrum'].mean()
        
        # Plot heatmaps
        plt.figure(figsize=(15, 8))
        sns.heatmap(prevalence_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='YlOrRd',
                    annot=True,
                    fmt='.1f')
        plt.title('Issue Prevalence Over Time (%)')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_prevalence.png")
        plt.close()
        
        plt.figure(figsize=(15, 8))
        sns.heatmap(emotional_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='Purples',
                    annot=True,
                    fmt='.2f',
                    vmin=1, vmax=5)
        plt.title('Average Emotional Intensity by Issue Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_emotional.png")
        plt.close()
        
        plt.figure(figsize=(15, 8))
        sns.heatmap(political_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='RdBu_r',
                    annot=True,
                    fmt='.2f',
                    vmin=1, vmax=5)
        plt.title('Average Political Position by Issue Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_political.png")
        plt.close()

    def recreate_all_plots(self, save_dir: str = 'recreated_plots'):
        """Recreate all plots"""
        print("Recreating all plots...")
        
        # Create main directory
        os.makedirs(save_dir, exist_ok=True)
        
        # Recreate each type of plot
        print("1. Recreating party trends...")
        self.recreate_party_trends(save_dir)
        
        print("2. Recreating issue trends...")
        self.recreate_issue_trends(save_dir)
        
        print("3. Recreating polarization trends...")
        self.recreate_polarization_trends(save_dir)
        
        print("4. Recreating issue heatmaps...")
        self.recreate_issue_heatmaps(save_dir)
        
        print("\nAll plots have been recreated successfully!")
        print(f"Output directory: {save_dir}")

def main():
    # Initialize the plot recreator
    recreator = PlotRecreator(data_dir='analysis_results')
    
    # Recreate all plots
    recreator.recreate_all_plots(save_dir='recreated_plots')

if __name__ == "__main__":
    main()

Recreating all plots...
1. Recreating party trends...
2. Recreating issue trends...
3. Recreating polarization trends...
4. Recreating issue heatmaps...

All plots have been recreated successfully!
Output directory: recreated_plots


### graphs recreation with no standard deviation shading

In [11]:
import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from typing import Dict, List

class PlotRecreator:
    def __init__(self, data_dir: str = 'analysis_results'):
        """Initialize the plot recreator with the data directory"""
        self.data_dir = data_dir
        
        # Load all data files
        with open(os.path.join(data_dir, 'raw_speech_data.json'), 'r') as f:
            self.raw_data = json.load(f)
            
        with open(os.path.join(data_dir, 'trend_data.json'), 'r') as f:
            self.trend_data = json.load(f)
            
        with open(os.path.join(data_dir, 'polarization_data.json'), 'r') as f:
            self.polarization_data = json.load(f)
            
        with open(os.path.join(data_dir, 'metadata.json'), 'r') as f:
            self.metadata = json.load(f)
        
        # Convert raw data to DataFrames
        self.raw_dfs = {
            congress: pd.DataFrame(data) 
            for congress, data in self.raw_data.items()
        }
    
    def recreate_party_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate party-level trend plots"""
        os.makedirs(save_dir, exist_ok=True)
        
        # Emotional Intensity by Party
        plt.figure(figsize=(12, 6))
        for party in ['D', 'R']:
            df = pd.DataFrame(self.trend_data['by_party'][party])
            plt.plot(df['congress'], df['emotional_avg'], 
                    label=f"{'Democratic' if party == 'D' else 'Republican'}")
        
        plt.title('Emotional Intensity by Party Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Average Emotional Intensity')
        plt.ylim(1, 5)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/emotional_intensity_by_party.png")
        plt.close()
        
        # Political Spectrum by Party
        plt.figure(figsize=(12, 6))
        for party in ['D', 'R']:
            df = pd.DataFrame(self.trend_data['by_party'][party])
            plt.plot(df['congress'], df['political_avg'],
                    label=f"{'Democratic' if party == 'D' else 'Republican'}")
        
        plt.title('Political Spectrum Position by Party Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Average Political Spectrum Position')
        plt.ylim(1, 5)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/political_spectrum_by_party.png")
        plt.close()
    
    def recreate_issue_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate issue-level trend plots"""
        base_dir = os.path.join(save_dir, 'issues')
        os.makedirs(base_dir, exist_ok=True)
        
        for issue in self.metadata['valid_issues']:
            issue_dir = os.path.join(base_dir, issue.lower().replace(' ', '_'))
            os.makedirs(issue_dir, exist_ok=True)
            
            # Emotional Intensity
            plt.figure(figsize=(12, 6))
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    plt.plot(df['congress'], df['emotional_avg'],
                            label=f"{'Democratic' if party == 'D' else 'Republican'}")
            
            plt.title(f'Emotional Intensity Over Time: {issue}')
            plt.xlabel('Congress')
            plt.ylabel('Average Emotional Intensity')
            plt.ylim(1, 5)
            plt.legend()
            plt.grid(True)
            plt.savefig(f"{issue_dir}/emotional_intensity.png")
            plt.close()
            
            # Political Spectrum
            plt.figure(figsize=(12, 6))
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    plt.plot(df['congress'], df['political_avg'],
                            label=f"{'Democratic' if party == 'D' else 'Republican'}")
            
            plt.title(f'Political Spectrum Position Over Time: {issue}')
            plt.xlabel('Congress')
            plt.ylabel('Average Political Spectrum Position')
            plt.ylim(1, 5)
            plt.legend()
            plt.grid(True)
            plt.savefig(f"{issue_dir}/political_spectrum.png")
            plt.close()
    
    def recreate_polarization_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate polarization trend plots"""
        os.makedirs(save_dir, exist_ok=True)
        
        plt.figure(figsize=(12, 6))
        df = pd.DataFrame(self.polarization_data['overall'])
        plt.plot(df['congress'], df['emotional_gap'], label='Emotional Gap')
        plt.plot(df['congress'], df['political_gap'], label='Political Gap')
        plt.title('Party Polarization Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Party Gap')
        plt.ylim(-4, 4)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/overall_polarization.png")
        plt.close()
    
    def recreate_issue_heatmaps(self, save_dir: str = 'recreated_plots'):
        """Recreate issue heatmaps"""
        heatmap_dir = os.path.join(save_dir, 'issue_heatmaps')
        os.makedirs(heatmap_dir, exist_ok=True)
        
        # Get all congresses and issues
        congresses = sorted(self.raw_data.keys(), key=int)
        issues = list(self.metadata['valid_issues'])
        
        # Initialize matrices
        prevalence_matrix = np.zeros((len(issues), len(congresses)))
        emotional_matrix = np.zeros((len(issues), len(congresses)))
        political_matrix = np.zeros((len(issues), len(congresses)))
        
        # Fill matrices using raw data
        for i, issue in enumerate(issues):
            for j, congress in enumerate(congresses):
                df = pd.DataFrame(self.raw_data[congress])
                # Find the LABEL that maps to this issue
                issue_label = [k for k, v in self.metadata['issue_map'].items() if v == issue][0]
                issue_mask = df['issues'].apply(lambda x: issue_label in x)
                issue_df = df[issue_mask]
                
                if not issue_df.empty:
                    prevalence_matrix[i, j] = len(issue_df) / len(df) * 100
                    emotional_matrix[i, j] = issue_df['emotional_intensity'].mean()
                    political_matrix[i, j] = issue_df['political_spectrum'].mean()
        
        # Plot heatmaps
        plt.figure(figsize=(15, 8))
        sns.heatmap(prevalence_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='YlOrRd',
                    annot=True,
                    fmt='.1f')
        plt.title('Issue Prevalence Over Time (%)')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_prevalence.png")
        plt.close()
        
        plt.figure(figsize=(15, 8))
        sns.heatmap(emotional_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='RdBu_r',
                    annot=True,
                    fmt='.2f',
                    vmin=1, vmax=5)
        plt.title('Average Emotional Intensity by Issue Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_emotional.png")
        plt.close()
        
        plt.figure(figsize=(15, 8))
        sns.heatmap(political_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='RdBu_r',
                    annot=True,
                    fmt='.2f',
                    vmin=1, vmax=5)
        plt.title('Average Political Position by Issue Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_political.png")
        plt.close()

    def recreate_all_plots(self, save_dir: str = 'recreated_plots'):
        """Recreate all plots"""
        print("Recreating all plots...")
        
        # Create main directory
        os.makedirs(save_dir, exist_ok=True)
        
        # Recreate each type of plot
        print("1. Recreating party trends...")
        self.recreate_party_trends(save_dir)
        
        print("2. Recreating issue trends...")
        self.recreate_issue_trends(save_dir)
        
        print("3. Recreating polarization trends...")
        self.recreate_polarization_trends(save_dir)
        
        print("4. Recreating issue heatmaps...")
        self.recreate_issue_heatmaps(save_dir)
        
        print("\nAll plots have been recreated successfully!")
        print(f"Output directory: {save_dir}")

def main():
    # Initialize the plot recreator
    recreator = PlotRecreator(data_dir='analysis_results')
    
    # Recreate all plots
    recreator.recreate_all_plots(save_dir='recreated_plots')

if __name__ == "__main__":
    main()

Recreating all plots...
1. Recreating party trends...
2. Recreating issue trends...
3. Recreating polarization trends...
4. Recreating issue heatmaps...

All plots have been recreated successfully!
Output directory: recreated_plots


In [None]:
import json
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
import networkx as nx
from typing import Dict, List

class PlotRecreator:
    def __init__(self, data_dir: str = 'analysis_results'):
        """Initialize the plot recreator with the data directory"""
        self.data_dir = data_dir
        
        # Load all data files
        with open(os.path.join(data_dir, 'raw_speech_data.json'), 'r') as f:
            self.raw_data = json.load(f)
            
        with open(os.path.join(data_dir, 'trend_data.json'), 'r') as f:
            self.trend_data = json.load(f)
            
        with open(os.path.join(data_dir, 'polarization_data.json'), 'r') as f:
            self.polarization_data = json.load(f)
            
        with open(os.path.join(data_dir, 'metadata.json'), 'r') as f:
            self.metadata = json.load(f)
        
        # Convert raw data to DataFrames
        self.raw_dfs = {
            congress: pd.DataFrame(data) 
            for congress, data in self.raw_data.items()
        }

    def plot_issue_evolution_matrix(self, save_dir: str = 'enhanced_plots'):
        """Create a multi-panel visualization showing issue evolution across all dimensions"""
        os.makedirs(save_dir, exist_ok=True)
        
        # For each issue, create a 2D scatter plot over time
        for issue in self.metadata['valid_issues']:
            fig, ax = plt.subplots(figsize=(15, 10))
            
            # Get data for both parties
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                   party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    
                    # Create scatter plot where:
                    # X-axis: political_avg
                    # Y-axis: emotional_avg
                    # Color: congress (for time evolution)
                    # Size: count (speech frequency)
                    scatter = ax.scatter(
                        df['political_avg'],
                        df['emotional_avg'],
                        c=df['congress'],
                        s=df['count']*5,  # Scale size for visibility
                        alpha=0.6,
                        cmap='coolwarm' if party == 'D' else 'RdYlBu',
                        label=f"{'Democratic' if party == 'D' else 'Republican'}"
                    )
                    
                    # Add arrows to show direction of movement over time
                    for i in range(len(df)-1):
                        ax.arrow(
                            df['political_avg'].iloc[i],
                            df['emotional_avg'].iloc[i],
                            df['political_avg'].iloc[i+1] - df['political_avg'].iloc[i],
                            df['emotional_avg'].iloc[i+1] - df['emotional_avg'].iloc[i],
                            head_width=0.05,
                            head_length=0.05,
                            fc='gray',
                            ec='gray',
                            alpha=0.3
                        )
            
            plt.colorbar(scatter, label='Congress')
            plt.title(f'Evolution of {issue} Framing (1945-2016)')
            plt.xlabel('Political Spectrum Position (Left → Right)')
            plt.ylabel('Emotional Intensity (Low → High)')
            plt.grid(True, alpha=0.3)
            plt.legend()
            plt.savefig(f"{save_dir}/{issue.lower().replace(' ', '_')}_evolution.png")
            plt.close()

    def plot_party_polarization_trends(self, save_dir: str = 'enhanced_plots'):
        """Create visualization showing political spectrum polarization over time"""
        os.makedirs(save_dir, exist_ok=True)
        
        # Calculate political spectrum gap between parties for each issue over time
        polarization_data = {}
        for issue in self.metadata['valid_issues']:
            if issue in self.trend_data['by_party_issue']:
                dem_data = pd.DataFrame(self.trend_data['by_party_issue'][issue]['D'])
                rep_data = pd.DataFrame(self.trend_data['by_party_issue'][issue]['R'])
                
                # Calculate absolute difference in political spectrum positions
                distances = []
                congresses = sorted(set(dem_data['congress']) & set(rep_data['congress']))
                for congress in congresses:
                    dem_point = dem_data[dem_data['congress'] == congress]
                    rep_point = rep_data[rep_data['congress'] == congress]
                    
                    if not dem_point.empty and not rep_point.empty:
                        distance = abs(rep_point['political_avg'].iloc[0] - dem_point['political_avg'].iloc[0])
                        distances.append((congress, distance))
                
                polarization_data[issue] = distances
        
        # Create streamgraph
        fig, ax = plt.subplots(figsize=(15, 8))
        
        # Prepare data for streamgraph
        issues = list(polarization_data.keys())
        congresses = sorted(set(c for d in polarization_data.values() for c, _ in d))
        data = np.zeros((len(issues), len(congresses)))
        
        for i, issue in enumerate(issues):
            for j, congress in enumerate(congresses):
                matching = [d for c, d in polarization_data[issue] if c == congress]
                if matching:
                    data[i, j] = matching[0]
        
        # Create streamgraph
        ax.stackplot(congresses, data, labels=issues, baseline='sym')
        
        plt.title('Political Spectrum Polarization by Issue Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Political Spectrum Gap (R-D)')
        plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
        plt.tight_layout()
        plt.savefig(f"{save_dir}/political_polarization_streamgraph.png")
        plt.close()

    def plot_issue_network(self, save_dir: str = 'enhanced_plots'):
        """Create network visualization showing issue relationships and shifts"""
        try:
            # Create graph
            G = nx.Graph()
            
            # Add nodes for each issue
            for issue in self.metadata['valid_issues']:
                if issue in self.trend_data['by_issue']:
                    df = pd.DataFrame(self.trend_data['by_issue'][issue])
                    
                    # Calculate average position and change over time
                    start_pos = np.array([df['political_avg'].iloc[0], df['emotional_avg'].iloc[0]])
                    end_pos = np.array([df['political_avg'].iloc[-1], df['emotional_avg'].iloc[-1]])
                    shift_magnitude = np.linalg.norm(end_pos - start_pos)
                    
                    G.add_node(issue, shift=shift_magnitude, pos=end_pos)
            
            # Add edges between issues that shifted similarly
            for issue1 in G.nodes():
                for issue2 in G.nodes():
                    if issue1 < issue2:
                        shift1 = G.nodes[issue1]['shift']
                        shift2 = G.nodes[issue2]['shift']
                        if abs(shift1 - shift2) < 0.5:  # Threshold for similarity
                            G.add_edge(issue1, issue2, weight=1/(abs(shift1 - shift2) + 0.1))
            
            # Draw network
            plt.figure(figsize=(15, 15))
            pos = nx.spring_layout(G)
            
            # Draw nodes
            node_sizes = [G.nodes[node]['shift'] * 1000 for node in G.nodes()]
            nx.draw_networkx_nodes(G, pos, node_size=node_sizes, node_color=node_sizes, 
                                 cmap='viridis', alpha=0.7)
            
            # Draw edges
            edge_weights = [G[u][v]['weight'] for u, v in G.edges()]
            nx.draw_networkx_edges(G, pos, width=edge_weights, alpha=0.4)
            
            # Add labels
            nx.draw_networkx_labels(G, pos)
            
            plt.title('Issue Relationship Network\n(Node size = magnitude of shift, Edge weight = similarity of shift)')
            plt.axis('off')
            plt.savefig(f"{save_dir}/issue_network.png")
            plt.close()
        except ImportError:
            print("NetworkX required for network visualization")
    
    # Original visualization methods
    def recreate_party_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate party-level trend plots"""
        os.makedirs(save_dir, exist_ok=True)
        
        # Emotional Intensity by Party
        plt.figure(figsize=(12, 6))
        for party in ['D', 'R']:
            df = pd.DataFrame(self.trend_data['by_party'][party])
            plt.plot(df['congress'], df['emotional_avg'], 
                    label=f"{'Democratic' if party == 'D' else 'Republican'}")
        
        plt.title('Emotional Intensity by Party Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Average Emotional Intensity')
        plt.ylim(1, 5)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/emotional_intensity_by_party.png")
        plt.close()
        
        plt.figure(figsize=(12, 6))
        for party in ['D', 'R']:
            df = pd.DataFrame(self.trend_data['by_party'][party])
            plt.plot(df['congress'], df['political_avg'],
                    label=f"{'Democratic' if party == 'D' else 'Republican'}")

        plt.title('Political Spectrum Position by Party Over Time', fontsize=16)
        plt.xlabel('Congress', fontsize=16)
        plt.ylabel('Average Political Spectrum Position', fontsize=16)
        plt.tick_params(axis='both', which='major', labelsize=16)
        plt.ylim(1, 5)
        plt.grid(True, alpha=0.3)
        plt.legend(fontsize=12)
        plt.savefig(f"{save_dir}/political_spectrum_by_party.png")
        plt.close()
    
    def recreate_issue_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate issue-level trend plots"""
        base_dir = os.path.join(save_dir, 'issues')
        os.makedirs(base_dir, exist_ok=True)
        
        for issue in self.metadata['valid_issues']:
            issue_dir = os.path.join(base_dir, issue.lower().replace(' ', '_'))
            os.makedirs(issue_dir, exist_ok=True)
            
            # Emotional Intensity
            plt.figure(figsize=(12, 6))
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    plt.plot(df['congress'], df['emotional_avg'],
                            label=f"{'Democratic' if party == 'D' else 'Republican'}")
            
            plt.title(f'Emotional Intensity Over Time: {issue}')
            plt.xlabel('Congress')
            plt.ylabel('Average Emotional Intensity')
            plt.ylim(1, 5)
            plt.legend()
            plt.grid(True)
            plt.savefig(f"{issue_dir}/emotional_intensity.png")
            plt.close()
            
            # Political Spectrum
            plt.figure(figsize=(12, 6))
            for party in ['D', 'R']:
                if issue in self.trend_data['by_party_issue'] and \
                party in self.trend_data['by_party_issue'][issue]:
                    df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                    plt.plot(df['congress'], df['political_avg'],
                            label=f"{'Democratic' if party == 'D' else 'Republican'}")
            
            plt.title(f'Political Spectrum Position Over Time: {issue}')
            plt.xlabel('Congress')
            plt.ylabel('Average Political Spectrum Position')
            plt.ylim(1, 5)
            plt.legend()
            plt.grid(True)
            plt.savefig(f"{issue_dir}/political_spectrum.png")
            plt.close()
    
    def recreate_polarization_trends(self, save_dir: str = 'recreated_plots'):
        """Recreate polarization trend plots"""
        os.makedirs(save_dir, exist_ok=True)
        
        plt.figure(figsize=(12, 6))
        df = pd.DataFrame(self.polarization_data['overall'])
        plt.plot(df['congress'], df['emotional_gap'], label='Emotional Gap')
        plt.plot(df['congress'], df['political_gap'], label='Political Gap')
        plt.title('Party Polarization Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Party Gap')
        plt.ylim(-4, 4)
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.savefig(f"{save_dir}/overall_polarization.png")
        plt.close()
    
    def recreate_issue_heatmaps(self, save_dir: str = 'recreated_plots'):
        """Recreate issue heatmaps"""
        heatmap_dir = os.path.join(save_dir, 'issue_heatmaps')
        os.makedirs(heatmap_dir, exist_ok=True)
        
        # Create shortened labels dictionary
        SHORT_LABELS = {
            'Immigration and Border Policy': 'Immigration',
            'Health and Social Services': 'Healthcare',
            'Environment and Energy': 'Environment',
            'Justice and Civil Rights': 'Justice',
            'Defense and Security': 'Defense',
            'Economy and Jobs': 'Economy',
            'Budget and Fiscal Responsibility': 'Budget',
            'Education and Innovation': 'Education',
            'Infrastructure and Transportation': 'Infrastructure'
        }
        
        # Get all congresses and issues
        congresses = sorted(self.raw_data.keys(), key=int)
        issues = list(self.metadata['valid_issues'])
        
        # Initialize matrices
        prevalence_matrix = np.zeros((len(issues), len(congresses)))
        emotional_matrix = np.zeros((len(issues), len(congresses)))
        political_matrix = np.zeros((len(issues), len(congresses)))
        
        # Fill matrices using raw data
        for i, issue in enumerate(issues):
            for j, congress in enumerate(congresses):
                df = pd.DataFrame(self.raw_data[congress])
                # Find the LABEL that maps to this issue
                issue_label = [k for k, v in self.metadata['issue_map'].items() if v == issue][0]
                issue_mask = df['issues'].apply(lambda x: issue_label in x)
                issue_df = df[issue_mask]
                
                if not issue_df.empty:
                    prevalence_matrix[i, j] = len(issue_df) / len(df) * 100
                    emotional_matrix[i, j] = issue_df['emotional_intensity'].mean()
                    political_matrix[i, j] = issue_df['political_spectrum'].mean()
        
        # Create shortened issue labels
        short_issues = [SHORT_LABELS[issue] for issue in issues]
        
        # Create congress labels for every 5th congress
        congress_numbers = [int(x) for x in congresses]  # Convert to integers
        congress_labels = [str(x) if int(x) % 5 == 0 else '' for x in congresses]
        
        # Plot emotional heatmap with modified labels
        plt.figure(figsize=(15, 8))
        sns.heatmap(emotional_matrix,
                    xticklabels=congress_labels,
                    yticklabels=short_issues,
                    cmap='Purples')
        
        plt.title('Average Emotional Intensity by Issue Over Time', fontsize=20)
        plt.xlabel('Congress', fontsize=20)
        plt.ylabel('Issue', fontsize=20)
        plt.xticks(fontsize=20)
        plt.yticks(fontsize=20, rotation=0)
        
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_emotional.png")
        plt.close()
        
        # Plot heatmaps
        plt.figure(figsize=(15, 8))
        sns.heatmap(prevalence_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='YlOrRd')
        plt.title('Issue Prevalence Over Time (%)')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_prevalence.png")
        plt.close()
        
        plt.figure(figsize=(15, 8))
        sns.heatmap(political_matrix,
                    xticklabels=congresses,
                    yticklabels=issues,
                    cmap='RdBu_r',
                    vmin=1, vmax=5)
        plt.title('Average Political Position by Issue Over Time')
        plt.xlabel('Congress')
        plt.ylabel('Issue')
        plt.tight_layout()
        plt.savefig(f"{heatmap_dir}/issue_political.png")
        plt.close()
    
    def recreate_all_plots(self, save_dir: str = 'recreated_plots'):
        """Recreate all plots including the new enhanced visualizations"""
        print("Recreating all plots...")
        
        # Create main directory
        os.makedirs(save_dir, exist_ok=True)
        
        # Original plots
        print("1. Recreating party trends...")
        self.recreate_party_trends(save_dir)
        
        print("2. Recreating issue trends...")
        self.recreate_issue_trends(save_dir)
        
        print("3. Recreating polarization trends...")
        self.recreate_polarization_trends(save_dir)
        
        print("4. Recreating issue heatmaps...")
        self.recreate_issue_heatmaps(save_dir)
        
        # Enhanced plots
        print("5. Creating issue evolution matrix plots...")
        self.plot_issue_evolution_matrix(save_dir)
        
        print("6. Creating party polarization streamgraph...")
        self.plot_party_polarization_trends(save_dir)
        
        print("7. Creating issue network visualization...")
        self.plot_issue_network(save_dir)
        
        print("\nAll plots have been recreated successfully!")
        print(f"Output directory: {save_dir}")

    def calculate_frameshifting(self, start_congress=79, end_congress=114):
        """Calculate frameshifting statistics for each issue and party."""
        
        # Define the early and late period ranges
        # First 5 congresses in the range [79..83], last 5 in [110..114]
        early_period = range(start_congress, start_congress + 5)
        late_period = range(end_congress - 4, end_congress + 1)

        results = {
            'by_issue_party': {},
            'aggregate': {}
        }
        
        # Gather lists for aggregate calculations across all issues
        all_political_shifts = {'D': [], 'R': []}
        all_emotional_shifts = {'D': [], 'R': []}
        
        issues = self.metadata['valid_issues']
        parties = ['D', 'R']
        
        for issue in issues:
            results['by_issue_party'][issue] = {}
            
            # Make sure issue data is present
            if issue not in self.trend_data['by_party_issue']:
                continue
            
            for party in parties:
                if party not in self.trend_data['by_party_issue'][issue]:
                    continue
                    
                df = pd.DataFrame(self.trend_data['by_party_issue'][issue][party])
                
                # Filter to desired congress range and sort
                df = df[(df['congress'] >= start_congress) & (df['congress'] <= end_congress)].copy()
                df.sort_values(by='congress', inplace=True)
                
                if df.empty:
                    continue
                
                # Compute early averages
                early_df = df[df['congress'].isin(early_period)]
                late_df = df[df['congress'].isin(late_period)]
                
                if early_df.empty or late_df.empty:
                    # If we don't have full coverage, we can skip
                    continue
                
                early_political_avg = early_df['political_avg'].mean()
                early_emotional_avg = early_df['emotional_avg'].mean()
                late_political_avg = late_df['political_avg'].mean()
                late_emotional_avg = late_df['emotional_avg'].mean()
                
                # Compute shifts
                political_shift = late_political_avg - early_political_avg
                emotional_shift = late_emotional_avg - early_emotional_avg
                
                # Store results
                issue_party_key = f"{issue}_{party}"
                results['by_issue_party'][issue][party] = {
                    'early_political_avg': early_political_avg,
                    'late_political_avg': late_political_avg,
                    'political_shift': political_shift,
                    'early_emotional_avg': early_emotional_avg,
                    'late_emotional_avg': late_emotional_avg,
                    'emotional_shift': emotional_shift,
                }
                
                # Add to aggregate
                all_political_shifts[party].append(political_shift)
                all_emotional_shifts[party].append(emotional_shift)
                
                # Find largest successive congress delta for political and emotional averages
                # look at consecutive entries in df
                df_sorted = df.sort_values(by='congress')
                df_sorted['political_delta'] = df_sorted['political_avg'].diff()
                df_sorted['emotional_delta'] = df_sorted['emotional_avg'].diff()
                
                # Identify the largest absolute changes
                max_political_delta = df_sorted['political_delta'].abs().max()
                max_emotional_delta = df_sorted['emotional_delta'].abs().max()
                
                # Which congresses caused these max deltas?
                max_pol_delta_row = df_sorted.iloc[df_sorted['political_delta'].abs().idxmax()]
                max_emot_delta_row = df_sorted.iloc[df_sorted['emotional_delta'].abs().idxmax()]
                
                # Because delta is between successive congresses, we need to find the pair
                # The congress associated with delta is the "current" row; 
                # The previous congress would be previous row
                pol_indices = df_sorted.index[df_sorted['political_delta'].abs() == max_political_delta]
                emot_indices = df_sorted.index[df_sorted['emotional_delta'].abs() == max_emotional_delta]
                
                # For simplicity, take the first occurrence if multiple maxes
                pol_idx = pol_indices[0] if len(pol_indices) > 0 else None
                emot_idx = emot_indices[0] if len(emot_indices) > 0 else None
                
                # Retrieve congress pair for political
                if pol_idx is not None:
                    pol_congress_current = df_sorted.loc[pol_idx, 'congress']
                    # The previous congress's row should be one before pol_idx
                    # But we need to ensure pol_idx - 1 is in the dataset
                    pol_prev_congress = df_sorted.iloc[df_sorted.index.get_loc(pol_idx)-1]['congress'] if df_sorted.index.get_loc(pol_idx)-1 >= 0 else None
                else:
                    pol_congress_current, pol_prev_congress = None, None
                
                # Retrieve congress pair for emotional
                if emot_idx is not None:
                    emot_congress_current = df_sorted.loc[emot_idx, 'congress']
                    emot_prev_congress = df_sorted.iloc[df_sorted.index.get_loc(emot_idx)-1]['congress'] if df_sorted.index.get_loc(emot_idx)-1 >= 0 else None
                else:
                    emot_congress_current, emot_prev_congress = None, None
                
                # Store these deltas
                results['by_issue_party'][issue][party].update({
                    'max_political_delta': max_political_delta,
                    'max_political_delta_congresses': (pol_prev_congress, pol_congress_current),
                    'max_emotional_delta': max_emotional_delta,
                    'max_emotional_delta_congresses': (emot_prev_congress, emot_congress_current)
                })
        
        # Compute aggregate results across all issues
        for party in parties:
            if all_political_shifts[party]:
                avg_political_shift = np.mean(all_political_shifts[party])
            else:
                avg_political_shift = None
            
            if all_emotional_shifts[party]:
                avg_emotional_shift = np.mean(all_emotional_shifts[party])
            else:
                avg_emotional_shift = None
            
            results['aggregate'][party] = {
                'avg_political_shift': avg_political_shift,
                'avg_emotional_shift': avg_emotional_shift
            }

        print("Frameshifting Results:")
        print("Aggregates:")
        for party in parties:
            print(f"  {party}: Avg Political Shift = {results['aggregate'][party]['avg_political_shift']}, Avg Emotional Shift = {results['aggregate'][party]['avg_emotional_shift']}")
        
        print("\nBy Issue and Party:")
        for issue in results['by_issue_party']:
            for party in parties:
                if party in results['by_issue_party'][issue]:
                    vals = results['by_issue_party'][issue][party]
                    print(f"Issue: {issue}, Party: {party}")
                    print(f"  Early Political Avg: {vals['early_political_avg']:.2f}, Late Political Avg: {vals['late_political_avg']:.2f}, Shift: {vals['political_shift']:.2f}")
                    print(f"  Early Emotional Avg: {vals['early_emotional_avg']:.2f}, Late Emotional Avg: {vals['late_emotional_avg']:.2f}, Shift: {vals['emotional_shift']:.2f}")
                    print(f"  Max Political Delta: {vals['max_political_delta']:.2f} (between Congresses {vals['max_political_delta_congresses']})")
                    print(f"  Max Emotional Delta: {vals['max_emotional_delta']:.2f} (between Congresses {vals['max_emotional_delta_congresses']})\n")
        
        return results


In [54]:
recreator = PlotRecreator(data_dir='analysis_results')
recreator.recreate_all_plots(save_dir='enhanced_plots')
frameshift_results = recreator.calculate_frameshifting()

Recreating all plots...
1. Recreating party trends...
2. Recreating issue trends...
3. Recreating polarization trends...
4. Recreating issue heatmaps...
5. Creating issue evolution matrix plots...
6. Creating party polarization streamgraph...
7. Creating issue network visualization...

All plots have been recreated successfully!
Output directory: enhanced_plots
Frameshifting Results:
Aggregates:
  D: Avg Political Shift = -0.5678000095849899, Avg Emotional Shift = 0.6979176835039582
  R: Avg Political Shift = 0.043799793274794316, Avg Emotional Shift = 0.6691645371998427

By Issue and Party:
Issue: Immigration and Border Policy, Party: D
  Early Political Avg: 3.00, Late Political Avg: 2.37, Shift: -0.63
  Early Emotional Avg: 2.56, Late Emotional Avg: 3.16, Shift: 0.60
  Max Political Delta: 1.50 (between Congresses (np.float64(93.0), np.int64(94)))
  Max Emotional Delta: 1.20 (between Congresses (np.float64(92.0), np.int64(93)))

Issue: Immigration and Border Policy, Party: R
  Early