In [20]:
import csv
from pprint import pprint
from openai_request import OpenAIRequestBase  # Ensure this is correctly imported

class JapaneseSentenceProcessor(OpenAIRequestBase):
    def __init__(self, csv_file_path, *args, **kwargs):
        super().__init__(*args, **kwargs)
        self.csv_file_path = csv_file_path

    def process_csv(self):
        with open(self.csv_file_path, newline='', encoding='utf-8') as csvfile:
            reader = csv.DictReader(csvfile)
            for idx, row in enumerate(reader):
                if idx == 100:
                    return self.process_row(row)
                # break
                
                

    def process_row(self, row):
        content = row['Content']
        
        print("content: ", content)
        # prompt = f"Given this Japanese text: {content}, could you write a daily used Japanese sentence (clean or rewrite), English, Arabic, Chinese, and Cantonese? The output should be in JSON format."
        prompt = (
            "Based on this Japanese text from Rosetta Stone: " + content + "\n\n. "
            "To help me learn languages, "
            "could you combine it to a full Japanese story and tranlate it to English, Arabic, Chinese, and Cantonese with pronunciation? "
            "Breakdown the formulated sentence into a complete list phonetic pairs for each character or word in  each language."
            "The output should be in JSON format: \n\n"
            # "**JSON Format**:\n"
            "```json\n"
            "{\n"
            "  \"ja\": {\n"
            "    \"full\": \"\",\n"
            "    \"pairs\": [\n"
            "      {\n"
            "        \"part\": \"\",\n"
            "        \"furigana\": \"\"\n"
            "      }\n"
            "    ]\n"
            "  },\n"
            "  \"en\": {\n"
            "    \"full\": \"\",\n"
            "    \"pairs\": [\n"
            "      {\n"
            "        \"part\": \"\",\n"
            "        \"phon\": \"\"\n"
            "      }\n"
            "    ]\n"
            "  },\n"
            "  \"ar\": {\n"
            "    \"full\": \"\",\n"
            "    \"pairs\": [\n"
            "      {\n"
            "        \"part\": \"\",\n"
            "        \"phon\": \"\"\n"
            "      }\n"
            "    ]\n"
            "  },\n"
            "  \"zh\": {\n"
            "    \"full\": \"\",\n"
            "    \"pairs\": [\n"
            "      {\n"
            "        \"part\": \"\",\n"
            "        \"pinyin\": \"\"\n"
            "      }\n"
            "    ]\n"
            "  },\n"
            "  \"yue\": {\n"
            "    \"full\": \"\",\n"
            "    \"pairs\": [\n"
            "      {\n"
            "        \"part\": \"\",\n"
            "        \"jyutping\": \"\"\n"
            "      }\n"
            "    ]\n"
            "  }\n"
            "}\n"
            "```"
        )
        system_content = "You are a multilingual translator capable of understanding and rewriting sentences in Japanese, English, Arabic, Chinese, and Cantonese with phonetic pairs."
        
        response = self.send_request_with_retry(prompt, system_content=system_content)
        pprint(response)  # Or handle the response as needed
        
        return response

# Assuming your OpenAIRequestBase class is defined in openai_request_base.py
if __name__ == "__main__":
    csv_file_path = 'japanese_language_data.csv'  # Update this path to your CSV file
    processor = JapaneseSentenceProcessor(csv_file_path, use_cache=True)
    
    response = processor.process_csv()


content:  青い 	シャツ\\nあおい	シャツ\\naoi	shatsu\\n赤い	シャツ\\nあかい	シャツ\\nakai	shatsu\\n緑	の	Tシャツ\\nみどり	の 	Tシャツ\\nmidori	no	Tshatsu\\n黄色	の	Tシャツ\\nきいろ	の 	Tシャツ\\nkiiro	no	Tshatsu21	 カップ 	を 	何個 	持って 	いますか。\\nカップ	を 	なんこ 	もって 	いますか。\\nkappu	o	nanko	motte	imasuka.\\n四個	持って 	います。\\nよんこ	もって 	います。\\nyonko	motte	imasu.\\nサンドイッチ 	を 	何個 	持って 	いますか。\\nサンドイッチ 	を 	なんこ 	もって 	いますか。\\nsandoicchi 	o	nanko	motte	imasuka.\\n五個	持って 	います。\\nごこ	もって 	います。\\ngoko	motte	imasu.


Traceback (most recent call last):
  File "/Users/lachlan/Documents/iProjects/rosetta_stone/openai_request.py", line 94, in parse_response
    return json.loads(json_string)
  File "/Users/lachlan/miniconda3/lib/python3.10/json/__init__.py", line 346, in loads
    return _default_decoder.decode(s)
  File "/Users/lachlan/miniconda3/lib/python3.10/json/decoder.py", line 337, in decode
    obj, end = self.raw_decode(s, idx=_w(s, 0).end())
  File "/Users/lachlan/miniconda3/lib/python3.10/json/decoder.py", line 353, in raw_decode
    obj, end = self.scan_once(s, idx)
json.decoder.JSONDecodeError: Expecting ',' delimiter: line 643 column 8 (char 10910)

During handling of the above exception, another exception occurred:

Traceback (most recent call last):
  File "/Users/lachlan/Documents/iProjects/rosetta_stone/openai_request.py", line 66, in send_request_with_retry
    parsed_response = self.parse_response(ai_response)
  File "/Users/lachlan/Documents/iProjects/rosetta_stone/openai_request.

{'ar': {'full': 'في يوم من الأيام، كان لدي قميص أزرق.',
        'pairs': [{'part': 'في', 'phon': 'fee'},
                  {'part': 'يوم', 'phon': 'yawm'},
                  {'part': 'من', 'phon': 'min'},
                  {'part': 'الأيام،', 'phon': 'al-ayyām'},
                  {'part': 'كان', 'phon': 'kān'},
                  {'part': 'لدي', 'phon': 'ladayya'},
                  {'part': 'قميص', 'phon': 'qamīṣ'},
                  {'part': 'أزرق', 'phon': "'azraq"},
                  {'part': '.', 'phon': ''}]},
 'en': {'full': 'One day, I had a blue shirt.',
        'pairs': [{'part': 'One', 'phon': 'wʌn'},
                  {'part': 'day', 'phon': 'deɪ'},
                  {'part': ',', 'phon': ''},
                  {'part': 'I', 'phon': 'aɪ'},
                  {'part': 'had', 'phon': 'hæd'},
                  {'part': 'a', 'phon': 'ə'},
                  {'part': 'blue', 'phon': 'bluː'},
                  {'part': 'shirt', 'phon': 'ʃɜːrt'},
                  {'part': '.', 'pho