In [2]:
# install pytorch
!pip install torch torchvision

Collecting torch
  Downloading torch-2.9.0-cp312-none-macosx_11_0_arm64.whl.metadata (30 kB)
Collecting torchvision
  Downloading torchvision-0.24.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (5.9 kB)
Collecting filelock (from torch)
  Downloading filelock-3.20.0-py3-none-any.whl.metadata (2.1 kB)
Collecting sympy>=1.13.3 (from torch)
  Downloading sympy-1.14.0-py3-none-any.whl.metadata (12 kB)
Collecting networkx>=2.5.1 (from torch)
  Downloading networkx-3.5-py3-none-any.whl.metadata (6.3 kB)
Collecting fsspec>=0.8.5 (from torch)
  Downloading fsspec-2025.9.0-py3-none-any.whl.metadata (10 kB)
Collecting pillow!=8.3.*,>=5.3.0 (from torchvision)
  Downloading pillow-12.0.0-cp312-cp312-macosx_11_0_arm64.whl.metadata (8.8 kB)
Collecting mpmath<1.4,>=1.1.0 (from sympy>=1.13.3->torch)
  Downloading mpmath-1.3.0-py3-none-any.whl.metadata (8.6 kB)
Downloading torch-2.9.0-cp312-none-macosx_11_0_arm64.whl (74.5 MB)
[2K   [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m74.5/74.5 MB[0m 

In [3]:
# import and pre-process the dataset of indian cities

import pandas as pd

indian_cities_df = pd.read_csv("/Users/srinandangondi/Downloads/cities_r2.csv")

indian_cities_df.head()

Unnamed: 0,name_of_city,state_code,state_name,dist_code,population_total,population_male,population_female,0-6_population_total,0-6_population_male,0-6_population_female,...,literates_female,sex_ratio,child_sex_ratio,effective_literacy_rate_total,effective_literacy_rate_male,effective_literacy_rate_female,location,total_graduates,male_graduates,female_graduates
0,Abohar,3,PUNJAB,9,145238,76840,68398,15870,8587,7283,...,44972,890,848,79.86,85.49,73.59,"30.1452928,74.1993043",16287,8612,7675
1,Achalpur,27,MAHARASHTRA,7,112293,58256,54037,11810,6186,5624,...,43086,928,909,91.99,94.77,89.0,"21.257584,77.5086754",8863,5269,3594
2,Adilabad,28,ANDHRA PRADESH,1,117388,59232,58156,13103,6731,6372,...,37660,982,947,80.51,88.18,72.73,"19.0809075,79.560344",10565,6797,3768
3,Adityapur,20,JHARKHAND,24,173988,91495,82493,23042,12063,10979,...,54515,902,910,83.46,89.98,76.23,"22.7834741,86.1576889",19225,12189,7036
4,Adoni,28,ANDHRA PRADESH,21,166537,82743,83794,18406,9355,9051,...,45089,1013,968,68.38,76.58,60.33,"15.6322227,77.2728368",11902,7871,4031


In [4]:
len(indian_cities_df)

493

In [5]:
# remove in-line spaces, hypens, and periods
indian_cities_df['name_of_city'] = indian_cities_df['name_of_city'].str.replace('[ -.]', '', regex=True)

In [6]:
# get city name column, convert to lower-case, and remove trailing white spaces
indian_cities_names = indian_cities_df['name_of_city'].str.lower().str.rstrip().tolist()
indian_cities_names[:5]

['abohar', 'achalpur', 'adilabad', 'adityapur', 'adoni']

In [7]:
# print out bigrams of first city name
for name in indian_cities_names[:1]:
    for c1, c2 in zip(name, name[1:]):
        print(c1, c2)

a b
b o
o h
h a
a r


In [8]:
# get 26 letters of alphabet, add q to it as no word in our dataset uses it
chars = sorted(list(set(''.join(indian_cities_names))))
chars = sorted(chars + ['q'])


In [9]:
# create letter to index and index to letter look-up dicts
c_to_i = {}
i_to_c = {}

c_to_i['.'] = 0
i_to_c[0] = '.'

for i, c in enumerate(chars, start=1):
    c_to_i[c] = i
    i_to_c[i] = c
    

In [10]:
print(c_to_i)
print("\n")
print(i_to_c)

{'.': 0, 'a': 1, 'b': 2, 'c': 3, 'd': 4, 'e': 5, 'f': 6, 'g': 7, 'h': 8, 'i': 9, 'j': 10, 'k': 11, 'l': 12, 'm': 13, 'n': 14, 'o': 15, 'p': 16, 'q': 17, 'r': 18, 's': 19, 't': 20, 'u': 21, 'v': 22, 'w': 23, 'x': 24, 'y': 25, 'z': 26}


{0: '.', 1: 'a', 2: 'b', 3: 'c', 4: 'd', 5: 'e', 6: 'f', 7: 'g', 8: 'h', 9: 'i', 10: 'j', 11: 'k', 12: 'l', 13: 'm', 14: 'n', 15: 'o', 16: 'p', 17: 'q', 18: 'r', 19: 's', 20: 't', 21: 'u', 22: 'v', 23: 'w', 24: 'x', 25: 'y', 26: 'z'}


In [11]:
import torch
import torch.nn.functional as F

In [14]:
# create bigrams for entire dataset and convert them to corresponding indices
city_x = []
city_y = []

for name in indian_cities_names:
    name = ['.'] + list(name) + ['.']
    for c1, c2 in zip(name, name[1:]):
        city_x.append(c_to_i[c1])
        city_y.append(c_to_i[c2])        

city_x = torch.tensor(city_x)
city_y = torch.tensor(city_y)

In [15]:
city_x

tensor([ 0,  1,  2,  ..., 13,  1, 12])

In [16]:
city_y

tensor([ 1,  2, 15,  ...,  1, 12,  0])

In [17]:
# convert inputs to one hot vectors
one_hot_x = torch.nn.functional.one_hot(city_x, num_classes=27).float()

In [18]:
one_hot_x

tensor([[1., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 1.,  ..., 0., 0., 0.],
        ...,
        [0., 0., 0.,  ..., 0., 0., 0.],
        [0., 1., 0.,  ..., 0., 0., 0.],
        [0., 0., 0.,  ..., 0., 0., 0.]])

In [19]:
# 5x27 input
# we need 27 neurons each with 27 weights and a bias
# define generator for reproducibility

g = torch.Generator().manual_seed(365)

weights = torch.randn(27,27, generator=g, requires_grad=True)
biases = torch.randn(27, generator=g, requires_grad=True)

In [20]:
weights.shape

torch.Size([27, 27])

In [21]:
biases

tensor([ 1.1278, -1.9540,  1.8379,  0.3690, -0.6884,  0.4385, -1.1833,  1.0985,
        -0.8912,  1.3808, -0.7798,  1.3334,  0.3925,  0.3801, -0.3684,  0.2962,
         0.2295, -0.4460, -1.1028, -1.9261, -0.5234, -2.0361,  0.0042,  1.2582,
        -1.2097, -0.8044, -0.5702], requires_grad=True)

In [24]:
# gradient descent
# experiment with # of epochs, learning rate, and regularization constant to get the lowest loss
for k in range(500):
  
  # forward pass
  # (5x27) * (27x27) + (27,)
  
  logits = one_hot_x @ weights + biases
  logits_exp = logits.exp() 
  probs = logits_exp / logits_exp.sum(dim=1, keepdims=True)
  
  #use L2 regularization with the cross entropy loss function   
  loss = -probs[torch.arange(city_x.nelement()), city_y].log().mean() + 0.001*(weights**2).mean()

  
  # backward pass
  weights.grad = None
  biases.grad = None
  loss.backward()
  

  weights.data -= 5 * weights.grad
  biases.data -= 5 * biases.grad

print(loss.item())    

2.313628911972046


In [25]:
loss.item()

2.313628911972046

In [28]:
# sample 5 words from the model
for _ in range(5):
  
  final = []
  i = 0
  
  while True:
    
    one_hot_x = F.one_hot(torch.tensor([i]), num_classes=27).float()
    
    logits = one_hot_x @ weights 
    logits_exp = logits.exp() 
    probs = logits_exp / logits_exp.sum(1, keepdims=True)

    #sample indices of next characters based on probs produced by model     
    i = torch.multinomial(probs, num_samples=1, replacement=True, generator=g).item()
    final.append(i_to_c[i])
    
    #end generating new characters if we encounter '.'     
    if i == 0:
      break
  
  print(''.join(final))

jmbeswarr.
sul.
puwasfafmyqarhfqheondzj.
mbipelysurculopqembalvucgrgvenpalyqyjyfvapurelikgubeshistqumjuqzizumapur.
chzjz.
