In [1]:
import openai
import pandas as pd
from tqdm import tqdm
from pulemet import Pulemet
import asyncio
import json

In [2]:
%load_ext autoreload
%autoreload 2

In [3]:
import nest_asyncio
nest_asyncio.apply()

In [4]:
client = openai.AsyncOpenAI(
    base_url="https://api.llm7.io/v1",
    api_key="unused"
)
pulemet = Pulemet(rps = 1.5, pbar=tqdm)

Total: 0it [00:00, ?it/s]
[A

In [5]:
df = pd.read_csv('data/test_markup.csv')
df.head(5)

Unnamed: 0.1,Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id,description
0,9027,Brentwood,Brentwood,37.9356,-121.719,United States,US,USA,California,,64609.0,1840018904,A suburban area known for its affluent lifesty...
1,15237,Guacarí,Guacari,3.7667,-76.3333,Colombia,CO,COL,Valle del Cauca,minor,35513.0,1170383272,"Nestled in the lush Colombian Andes, this smal..."
2,3961,Zhanlicun,Zhanlicun,23.2881,116.2594,China,CN,CHN,Guangdong,,161439.0,1156184947,"Located in southern China, this village is kno..."
3,7113,Arujá,Aruja,-23.3967,-46.3211,Brazil,BR,BRA,São Paulo,minor,83939.0,1076436872,"Situated in southeastern Brazil, this city is ..."
4,7937,Novotroitsk,Novotroitsk,51.2039,58.3114,Russia,RU,RUS,Orenburgskaya Oblast’,minor,74469.0,1643186193,"In the heart of Russia, this city is an import..."


In [6]:
async def test_llm(client, description, model = "gpt-4.1-nano-2025-04-14"):
    try:
        response = await client.chat.completions.create(
            model=model,
            messages=[
                {"role": "system", "content": 'Try to guess coordinates of city in description. \
                    For each city return your response in the following JSON format: {"lat": "latitude answer", "log": "longitude answer"}' },
                {"role": "user", "content": f"Description: {description}"}
            ]
        )
        return response.choices[0].message.content
    except Exception as e:
        return f"Ошибка: {str(e)}"

In [7]:
tasks = pulemet.process([test_llm(client, i['description']) for i in df.iloc])
responses = await asyncio.gather(*tasks, return_exceptions=True)

Total: 100%|██████████| 1960/1960 [21:52<00:00,  1.50it/s]

In [10]:
answers = []
failed = []
for i, ans in enumerate(responses):
    try:
        answers.append(json.loads(ans))
    except:
        failed.append(i)
        print(i, ans)

404 {"lat": "-3.119," "log": "-52.125"}
1896 Ошибка: 'NoneType' object is not subscriptable


In [17]:
tasks = pulemet.process([test_llm(client, i['description']) for i in df.iloc[failed].iloc])
responses_fix = await asyncio.gather(*tasks, return_exceptions=True)

Total: 100%|█████████▉| 1961/1962 [27:31<01:42, 102.23s/it]

In [18]:
responses_fix

['{"lat": "-3.119," "log": "-52.125"}',
 '{"lat": "-20.6934", "log": "-52.4384"}']

In [23]:
answers_fix = []
for i, ans in enumerate(responses):
    if i == 404:
        answers_fix.append(json.loads('{"lat": "-3.119", "log": "-52.125"}'))
    elif i == 1896 :
        answers_fix.append(json.loads('{"lat": "-20.6934", "log": "-52.4384"}'))
    else:
        answers_fix.append(json.loads(ans))
df['ans_lat'] = [i['lat'] for i in answers_fix]
df['ans_log'] = [i['log'] for i in answers_fix]
df.head()

Unnamed: 0.1,Unnamed: 0,city,city_ascii,lat,lng,country,iso2,iso3,admin_name,capital,population,id,description,ans_lat,ans_log
0,9027,Brentwood,Brentwood,37.9356,-121.719,United States,US,USA,California,,64609.0,1840018904,A suburban area known for its affluent lifesty...,37.4456,-122.1607
1,15237,Guacarí,Guacari,3.7667,-76.3333,Colombia,CO,COL,Valle del Cauca,minor,35513.0,1170383272,"Nestled in the lush Colombian Andes, this smal...",4.7128,-75.0131
2,3961,Zhanlicun,Zhanlicun,23.2881,116.2594,China,CN,CHN,Guangdong,,161439.0,1156184947,"Located in southern China, this village is kno...",23.1167,113.246
3,7113,Arujá,Aruja,-23.3967,-46.3211,Brazil,BR,BRA,São Paulo,minor,83939.0,1076436872,"Situated in southeastern Brazil, this city is ...",-23.0094,-47.8824
4,7937,Novotroitsk,Novotroitsk,51.2039,58.3114,Russia,RU,RUS,Orenburgskaya Oblast’,minor,74469.0,1643186193,"In the heart of Russia, this city is an import...",56.85,60.61


In [22]:
df.to_csv('data/gpt41nano_test.csv', index=False)