In [None]:
!pip uninstall -y whisper
!pip install -U openai-whisper
!pip uninstall -y whisper openai-whisper
!pip install git+https://github.com/openai/whisper.git


[0mCollecting openai-whisper
  Downloading openai-whisper-20240930.tar.gz (800 kB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m800.5/800.5 kB[0m [31m11.0 MB/s[0m eta [36m0:00:00[0m
[?25h  Installing build dependencies ... [?25l[?25hdone
  Getting requirements to build wheel ... [?25l[?25hdone
  Preparing metadata (pyproject.toml) ... [?25l[?25hdone
Collecting tiktoken (from openai-whisper)
  Downloading tiktoken-0.8.0-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (6.6 kB)
Collecting nvidia-cuda-nvrtc-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_nvrtc_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-runtime-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_runtime_cu12-12.4.127-py3-none-manylinux2014_x86_64.whl.metadata (1.5 kB)
Collecting nvidia-cuda-cupti-cu12==12.4.127 (from torch->openai-whisper)
  Downloading nvidia_cuda_cupti_cu12-12.4.127-py3

In [None]:
!pip install speechbrain
!pip install torchaudio speechbrain whisper openai
!pip install openai-whisper
!pip show openai-whisper
!pip install openai
!pip install --upgrade openai
!pip install -U openai-whisper
!pip install torch torchaudio speechbrain whisper openai

Collecting speechbrain
  Downloading speechbrain-1.0.2-py3-none-any.whl.metadata (23 kB)
Collecting hyperpyyaml (from speechbrain)
  Downloading HyperPyYAML-1.2.2-py3-none-any.whl.metadata (7.6 kB)
Collecting ruamel.yaml>=0.17.28 (from hyperpyyaml->speechbrain)
  Downloading ruamel.yaml-0.18.10-py3-none-any.whl.metadata (23 kB)
Collecting ruamel.yaml.clib>=0.2.7 (from ruamel.yaml>=0.17.28->hyperpyyaml->speechbrain)
  Downloading ruamel.yaml.clib-0.2.12-cp311-cp311-manylinux_2_17_x86_64.manylinux2014_x86_64.whl.metadata (2.7 kB)
Downloading speechbrain-1.0.2-py3-none-any.whl (824 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m824.8/824.8 kB[0m [31m14.8 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading HyperPyYAML-1.2.2-py3-none-any.whl (16 kB)
Downloading ruamel.yaml-0.18.10-py3-none-any.whl (117 kB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m117.7/117.7 kB[0m [31m8.9 MB/s[0m eta [36m0:00:00[0m
[?25hDownloading ruamel.yaml.clib-0.2.12-cp311-cp

In [None]:
import torch
import torchaudio
import torchaudio.transforms as T
import whisper
from openai import OpenAI
import numpy as np
from typing import Dict, Any
import json
import logging
from datetime import datetime

In [None]:
class StressAnalyzer:
    def __init__(self, openai_api_key: str):
        """Initialize the Stress Analyzer with required models and configurations."""
        logging.basicConfig(level=logging.INFO)
        self.logger = logging.getLogger(__name__)

        # Initialize OpenAI client with new API
        self.client = OpenAI(api_key=openai_api_key)

        self.logger.info("Loading Whisper model...")
        self.whisper_model = whisper.load_model("base")

        # Audio processing parameters
        self.sample_rate = 16000
        self.n_mels = 128
        self.n_fft = 2048
        self.hop_length = 512

        # Initialize audio transforms
        self.mel_transform = T.MelSpectrogram(
            sample_rate=self.sample_rate,
            n_fft=self.n_fft,
            hop_length=self.hop_length,
            n_mels=self.n_mels,
            normalized=True
        )

        self.logger.info("Initialization complete!")

    def _extract_audio_features(self, waveform: torch.Tensor) -> Dict[str, float]:
        """Extract audio features for emotion analysis."""
        try:
            # Convert to mel spectrogram
            mel_spec = self.mel_transform(waveform)
            mel_spec = torch.log1p(mel_spec)

            # Calculate features
            temporal_features = {
                "energy": float(torch.mean(torch.sum(mel_spec, dim=1))),
                "energy_std": float(torch.std(torch.sum(mel_spec, dim=1))),
                "spectral_centroid": float(torch.mean(
                    torch.sum(mel_spec * torch.arange(self.n_mels, device=mel_spec.device).view(1, -1, 1), dim=1) /
                    (torch.sum(mel_spec, dim=1) + 1e-8)
                )),
                "zero_crossing_rate": float(torch.mean((waveform[..., :-1] * waveform[..., 1:] < 0).float()))
            }

            # Map features to emotions
            emotions = self._map_features_to_emotions(temporal_features)
            return emotions

        except Exception as e:
            self.logger.error(f"Error extracting features: {str(e)}")
            raise

    def _map_features_to_emotions(self, features: Dict[str, float]) -> Dict[str, float]:
        """Map audio features to emotion scores."""
        try:
            # Normalize features
            max_energy = 10.0
            max_spectral = float(self.n_mels)

            norm_features = {
                "energy": min(1.0, features["energy"] / max_energy),
                "energy_std": min(1.0, features["energy_std"] / max_energy),
                "spectral_centroid": min(1.0, features["spectral_centroid"] / max_spectral),
                "zero_crossing_rate": min(1.0, features["zero_crossing_rate"] * 2.0)
            }

            # Calculate emotion scores
            emotions = {
                "angry": 0.4 * norm_features["energy"] + 0.3 * norm_features["zero_crossing_rate"] +
                        0.3 * norm_features["energy_std"],
                "happy": 0.4 * norm_features["energy"] + 0.4 * norm_features["spectral_centroid"] +
                        0.2 * (1 - norm_features["energy_std"]),
                "sad": 0.4 * (1 - norm_features["energy"]) + 0.3 * (1 - norm_features["spectral_centroid"]) +
                       0.3 * norm_features["energy_std"],
                "neutral": 0.4 * (1 - norm_features["energy_std"]) + 0.3 * (1 - abs(0.5 - norm_features["energy"])) +
                          0.3 * (1 - abs(0.5 - norm_features["spectral_centroid"]))
            }

            # Normalize to sum to 1
            total = sum(emotions.values()) + 1e-8
            emotions = {k: round(v/total, 4) for k, v in emotions.items()}

            return emotions

        except Exception as e:
            self.logger.error(f"Error mapping emotions: {str(e)}")
            raise

    def _transcribe_audio(self, audio_path: str) -> Dict[str, Any]:
        """Transcribe audio using Whisper."""
        try:
            result = self.whisper_model.transcribe(audio_path)
            return {
                "text": result["text"],
                "segments": [
                    {
                        "text": segment["text"],
                        "start": round(segment["start"], 2),
                        "end": round(segment["end"], 2),
                    }
                    for segment in result["segments"]
                ]
            }
        except Exception as e:
            self.logger.error(f"Error in transcription: {str(e)}")
            raise

    def _analyze_stress(self, emotion_scores: Dict[str, float],
                       transcription: Dict[str, Any]) -> Dict[str, Any]:
        """Get stress analysis from GPT-4 using new OpenAI API."""
        try:
            prompt = {
                "emotion_scores": emotion_scores,
                "transcription": transcription["text"],
                "instructions": "Based on the emotional scores and transcribed speech, provide:"
                              "\n1. Stress level (1-7 scale)"
                              "\n2. Confidence score (0-1)"
                              "\n3. List of stress indicators"
                              "\n4. Recommendations for stress management"
                              "\nReturn the response in JSON format."
            }

            # Using new OpenAI API format
            response = self.client.chat.completions.create(
                model="gpt-4",
                messages=[
                    {"role": "system", "content": "You are an assistant designed to analyze the stress level in speech. Focus on identifying emotional intensity, such as regret, nostalgia, uneasiness, or conflict, and map it to a stress level from 1 to 7. Provide an explanation for the level Provide analysis in JSON format."},
                    {"role": "user", "content": json.dumps(prompt, indent=2)}
                ],
                temperature=0.7
            )

            # Extract content from new API response format
            result = response.choices[0].message.content
            return json.loads(result)

        except Exception as e:
            self.logger.error(f"Error in GPT analysis: {str(e)}")
            raise

    def analyze_stress(self, audio_path: str) -> Dict[str, Any]:
        """Perform complete stress analysis on audio file."""
        try:
            # Load and preprocess audio
            waveform, sr = torchaudio.load(audio_path)
            if sr != self.sample_rate:
                resampler = T.Resample(sr, self.sample_rate)
                waveform = resampler(waveform)
            if waveform.shape[0] > 1:
                waveform = torch.mean(waveform, dim=0, keepdim=True)

            # Extract features
            emotion_scores = self._extract_audio_features(waveform)
            self.logger.info("Audio features extracted successfully")

            # Get transcription
            transcription = self._transcribe_audio(audio_path)
            self.logger.info("Transcription complete")

            # Get stress analysis
            stress_analysis = self._analyze_stress(emotion_scores, transcription)
            self.logger.info("Stress analysis complete")

            output = {
                "metadata": {
                    "timestamp": datetime.now().isoformat(),
                    "audio_file": audio_path,
                    "status": "success"
                },
                "analysis": {
                    "emotion_scores": emotion_scores,
                    "transcription": transcription,
                    "stress_assessment": stress_analysis
                }
            }

            self.logger.info("Analysis completed successfully")
            return output

        except Exception as e:
            self.logger.error(f"Error in stress analysis: {str(e)}")
            return {
                "metadata": {
                    "timestamp": datetime.now().isoformat(),
                    "audio_file": audio_path,
                    "status": "failed"
                },
                "error": str(e)
            }

def main():
    logging.basicConfig(
        level=logging.INFO,
        format='%(asctime)s - %(name)s - %(levelname)s - %(message)s'
    )
    logger = logging.getLogger(__name__)

    try:
        # Initialize analyzer with your OpenAI API key
        analyzer = StressAnalyzer(openai_api_key="hhhhhh")

        # Process audio file
        results = analyzer.analyze_stress("fire.mp3")
        results = analyzer.analyze_stress("sad.wav")
        results = analyzer.analyze_stress("New Recording 20.wav")
        results = analyzer.analyze_stress("New Recording 15.m4a")
        results = analyzer.analyze_stress("test1.mp3")
        results = analyzer.analyze_stress("New Recording 16.m4a")
        results = analyzer.analyze_stress("New Recording 13.m4a")
        results = analyzer.analyze_stress("New Recording 25.m4a")
        results = analyzer.analyze_stress("1008_IWW_ANG_XX.wav")
        #results = analyzer.analyze_stress("New Recording 26.m4a")
        results = analyzer.analyze_stress("test1.mp3")
        # Save results
        with open("stress_analysis_results.json", "w", encoding="utf-8") as f:
            json.dump(results, f, indent=2, ensure_ascii=False)

        print("Analysis complete. Results saved to stress_analysis_results.json")

        # Print key results
        if results["metadata"]["status"] == "success":
            analysis = results["analysis"]
            print("\nKey findings:")
            #print("Emotion Scores:", json.dumps(analysis["emotion_scores"], indent=2))
            print("Emotion Scores:", json.dumps(analysis["emotion_scores"], separators=(',', ':')))
            print("Transcription:", analysis["transcription"]["text"])
            print("Stress Assessment:", json.dumps(analysis["stress_assessment"], indent=2))
            #print("Stress Assessment:", json.dumps(analysis["stress_assessment"], separators=(',', ':')))
        else:
            print(f"Analysis failed: {results.get('error', 'Unknown error')}")

    except Exception as e:
        logger.error("Analysis failed", exc_info=True)
        print(f"Error during analysis: {str(e)}")

if __name__ == "__main__":
    main()

  checkpoint = torch.load(fp, map_location=device)
ERROR:__main__:Error in stress analysis: Failed to open the input "fire.mp3" (No such file or directory).
Exception raised from get_input_format_context at /__w/audio/audio/pytorch/audio/src/libtorio/ffmpeg/stream_reader/stream_reader.cpp:42 (most recent call first):
frame #0: c10::Error::Error(c10::SourceLocation, std::string) + 0x96 (0x7bf14316c446 in /usr/local/lib/python3.11/dist-packages/torch/lib/libc10.so)
frame #1: c10::detail::torchCheckFail(char const*, char const*, unsigned int, std::string const&) + 0x64 (0x7bf1431166e4 in /usr/local/lib/python3.11/dist-packages/torch/lib/libc10.so)
frame #2: <unknown function> + 0x42134 (0x7bf1430a8134 in /usr/local/lib/python3.11/dist-packages/torio/lib/libtorio_ffmpeg4.so)
frame #3: torio::io::StreamingMediaDecoder::StreamingMediaDecoder(std::string const&, std::optional<std::string> const&, std::optional<std::map<std::string, std::string, std::less<std::string>, std::allocator<std::pair

Analysis complete. Results saved to stress_analysis_results.json

Key findings:
Emotion Scores: {"angry":0.1278,"happy":0.1823,"sad":0.3041,"neutral":0.3858}
Transcription:  loans that we can offer with this or farm ownership loans, operating lines of credit, or equipment and capital improvement need loans. The benefit to the
Stress Assessment: {
  "stress_level": 2,
  "confidence_score": 0.85,
  "stress_indicators": [
    "Neutral tone",
    "Minor sadness"
  ],
  "recommendations": [
    "Maintain composure and calmness",
    "Ensure clear and positive communication"
  ],
  "explanation": "The speaker's speech is largely neutral with a minor indication of sadness. However, the overall emotional intensity is low, suggesting a low stress level. Therefore, the stress level is evaluated as 2 out of 7."
}
