In [4]:
import os
import re

# Define paths to the transcripts folder and output folder
transcripts_folder = "transcripts"
output_folder = "cleaned_transcripts"

# Ensure the output folder exists
os.makedirs(output_folder, exist_ok=True)

def clean_transcript(filename):
    """Extract and clean text from an SRV1 transcript."""
    try:
        # Open and read the transcript file
        with open(os.path.join(transcripts_folder, filename), "r", encoding="utf-8") as f:
            content = f.read()

        # Extract text between <text> and </text> tags using regex
        texts = re.findall(r"<text[^>]*>(.*?)</text>", content, re.DOTALL)

        if not texts:
            print(f"No content found in: {filename}")
            return

        # Join the extracted texts into a single string with newlines
        cleaned_content = "\n".join(texts)

        # Create the output filename
        cleaned_filename = os.path.join(output_folder, f"{os.path.splitext(filename)[0]}.txt")

        # Write the cleaned content to the new file
        with open(cleaned_filename, "w", encoding="utf-8") as f:
            f.write(cleaned_content)

        print(f"Processed: {filename} -> {cleaned_filename}")

    except Exception as e:
        print(f"Error processing {filename}: {e}")

# List all files in the transcripts folder
files = os.listdir(transcripts_folder)

if not files:
    print("No files found in the transcripts folder.")
else:
    for filename in files:
        # Only process SRV1 files
        if filename.endswith(".srv1"):
            clean_transcript(filename)
        else:
            print(f"Skipping non-SRV1 file: {filename}")

print("Processing complete.")


Processed: Apple Vision Pro is Missing Something....en.srv1 -> cleaned_transcripts/Apple Vision Pro is Missing Something....en.txt
Processed: Tesla Cybertruck Review： Already Iconic？.en.srv1 -> cleaned_transcripts/Tesla Cybertruck Review： Already Iconic？.en.txt
Processed: Google Pixel 8A Impressions： Just Get The 8!.en.srv1 -> cleaned_transcripts/Google Pixel 8A Impressions： Just Get The 8!.en.txt
Processed: OnePlus Open is Awesome - What's Happening with Folding Phones？!.en.srv1 -> cleaned_transcripts/OnePlus Open is Awesome - What's Happening with Folding Phones？!.en.txt
Processed: What is Happening with iPhone Camera？.en.srv1 -> cleaned_transcripts/What is Happening with iPhone Camera？.en.txt
Processed: Samsung Galaxy S24 Ultra Review： Why Buy Anything Else？.en.srv1 -> cleaned_transcripts/Samsung Galaxy S24 Ultra Review： Why Buy Anything Else？.en.txt
Processed: iPhone 16⧸16 Pro Review： Times Have Changed!.en.srv1 -> cleaned_transcripts/iPhone 16⧸16 Pro Review： Times Have Changed!.en

In [None]:
from openai import OpenAI

client = OpenAI()
import os
from dotenv import load_dotenv

load_dotenv()

client.api_key = os.getenv("OPENAI_API_KEY") 

In [19]:
import os

# Define the path to the cleaned transcripts folder
cleaned_transcripts_folder = "cleaned_transcripts"
enhanced_transcripts_folder = "enhanced_transcripts"

# Ensure the output folder for enhanced transcripts exists
os.makedirs(enhanced_transcripts_folder, exist_ok=True)

def clean(dirty_text):
    """Use OpenAI GPT API to clean and enhance the transcript."""
    completion = client.chat.completions.create(
        model="gpt-4o-mini",  # Assuming this is the correct model in your environment
        messages=[
            {"role": "system", "content": 
             """Your task is to clean and enhance transcripts while preserving their original meaning and structure. Remove any strange symbols, redundant whitespace, or formatting issues. Ensure the text flows naturally and remains as close as possible to the original wording, but with improved readability. Do not change the meaning or tone of the transcript."""},
            {"role": "user", "content": dirty_text}
        ]
    )
    print(completion.choices[0].message.content)  # Print the cleaned output
    return completion.choices[0].message.content

def enhance_transcript(filename):
    """Read a cleaned transcript, enhance it using the clean function, and save it."""
    try:
        # Open and read the cleaned transcript file
        with open(os.path.join(cleaned_transcripts_folder, filename), "r", encoding="utf-8") as f:
            raw_text = f.read()

        # Use the clean function to enhance the text
        enhanced_content = clean(raw_text)

        # Save the enhanced content to a new .txt file
        enhanced_filename = os.path.join(enhanced_transcripts_folder, filename)
        with open(enhanced_filename, "w", encoding="utf-8") as f:
            f.write(enhanced_content)

        print(f"Enhanced: {filename} -> {enhanced_filename}")

    except Exception as e:
        print(f"Error enhancing {filename}: {e}")

# List all files in the cleaned transcripts folder
files = os.listdir(cleaned_transcripts_folder)

if not files:
    print("No files found in the cleaned transcripts folder.")
else:
    for filename in files:
        # Only process .txt files
        if filename.endswith(".txt"):
            enhance_transcript(filename)
        else:
            print(f"Skipping non-txt file: {filename}")

print("Enhancement complete.")


All right, so we wanted to answer the question once and for all: What is the best smartphone camera out right now? Maybe you think it’s the iPhone, or maybe it's a Samsung flagship, or perhaps it’s one of those Pixels. 

In years past, we’ve conducted blind voting using social media polls, which was really fun and yielded some surprising results. However, the results still depended on the match-ups I set up at the very beginning. I specifically placed the iPhone and the Pixel on opposite sides of the bracket, expecting the best ones to hopefully meet in the finals, but they never did. Plus, there was always a bit of human input involved; it’s possible I could have accidentally placed the second best phone against the best phone in the first round and had it eliminated.

So this time, we’re going to solve that once and for all. Welcome to the "Blind Smartphone Camera Test: Scientific Edition." 

Here’s the idea: We took 16 smartphones that came out this year. These are the flagship mode