In [1]:
# # MOUNTING GOOGLE DRIVE
# from google.colab import drive
# drive.mount('/content/drive')

# import os
# print(os.getcwd())

# wd = '/content/drive/MyDrive/CS 685/cs685_project/notebooks'
# print(os.listdir(wd))
# os.chdir(wd)
# print(os.getcwd())

## Train model

In [7]:
import os, errno
import sys
import json
import random
import numpy as np
from tqdm import trange

import torch
import torch.nn as nn
from torch import optim
import torch.nn.functional as F

from torch.utils.tensorboard import SummaryWriter
# from utils import save_checkpoint, load_checkpoint

# from torch.utils.data import DataLoader
# from load_dataset import Text2SQLDataset

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device

device(type='cpu')

## Read data

In [3]:

# local
# target_folder = "/Users/aishwarya/Downloads/spring23/cs685-NLP/project/LSTM_encoder_decoder/data/data_final_processed_v2"
# target_folder = "/Users/aishwarya/Downloads/spring23/cs685-NLP/project/data/baseline/training_data"

# for colab
target_folder = "../data/baseline/training_data"

## GET DATA
#sample data for checking network
fol1 = 'train'
data_t = 'encode'
X_train_np = np.load(os.path.join(target_folder, fol1, f"{fol1}_{data_t}.npy"))
X_train_np = X_train_np[:5]
X_train_np = X_train_np.transpose(1,0)
# train_input = np.expand_dims(train_input, axis=-1) 
print(f'Train encoder input - {X_train_np.shape}')


#sample data for checking network
fol1 = 'train'
data_t = 'decode'
Y_train_np = np.load(os.path.join(target_folder, fol1, f"{fol1}_{data_t}.npy"))
Y_train_np = Y_train_np[:5]
Y_train_np = Y_train_np.transpose(1,0)
# train_output = np.expand_dims(train_output, axis=-1) 
print(f'Train decoder input - {Y_train_np.shape}')

with open(os.path.join(target_folder, 'data_info.json'), 'r') as fp:
    data_info = json.load(fp)
    
pad_idx = data_info['pad_idx']
sos_idx = data_info['sos_idx']
vocab_size = data_info['vocab_size']
    
data_info

Train encoder input - (43, 5)
Train decoder input - (129, 5)


{'vocab_size': 5938,
 'max_encoder_len': 43,
 'max_decoder_len': 127,
 'pad_idx': 1462,
 'sos_idx': 1463,
 'eos_idx': 1461}

In [4]:
# convert numpy array to tensors
X_train = torch.from_numpy(X_train_np).type(torch.int64) #torch.int64, torch.Tensor
Y_train = torch.from_numpy(Y_train_np).type(torch.int64)

X_train.shape, Y_train.shape

(torch.Size([43, 5]), torch.Size([129, 5]))

## Build Network

In [8]:
# FOR PRINTING INTERMEDIATE TORCH SIZES
DEBUG_FLAG = False

In [9]:
class bilstm_encoder(nn.Module):
    ''' Encodes time-series sequence '''

    def __init__(self, input_size, hidden_size, emb_size, num_layers = 1, dropout = 0):
        
        '''
        : param input_size:     the number of features in the input X, eg: word embeddings
        : param hidden_size:    the number of features in the hidden state h
        : param num_layers:     number of recurrent layers (i.e., 2 means there are
        :                       2 stacked LSTMs)
        '''
        
        super(bilstm_encoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        if DEBUG_FLAG:
            print(f"Encoder: input_size {input_size} - hidden_size {hidden_size} - emb_size {emb_size}")

        # define embeddings
        self.embeddings = nn.Embedding(input_size, emb_size)

        # define LSTM layer
        self.lstm = nn.LSTM(input_size = emb_size,
                            hidden_size = hidden_size,
                            num_layers = num_layers,
                            bidirectional = True,
                            dropout = dropout)

    def forward(self, x_input):
        
        '''
        : param x_input:               input of shape (seq_len, # in batch) #, input_size)
        : return lstm_out, hidden:     lstm_out gives all the hidden states in the sequence;
        :                              hidden gives the hidden state and cell state for the last
        :                              element in the sequence 
        '''
        
        
        embedded = self.embeddings(x_input)
        # embedded size: (seq_len, batch_size, embedding_size)
        if DEBUG_FLAG:
            print(f"Encoder embedded size - {type(embedded)} - {embedded.shape}")
#         embedded = embedded.view(1, 1, -1)
#         print(f"Encoder embedded size - {type(embedded)} - {embedded.shape}")
        
        lstm_out, self.hidden = self.lstm(embedded)
        if DEBUG_FLAG:
            print(f"Encoder hidden_state size - {type(self.hidden)} - {self.hidden[0].shape}")
        # lstm_out, self.hidden = self.lstm(x_input.view(x_input.shape[0], x_input.shape[1], self.input_size))
        
        return lstm_out, self.hidden     
    
    def init_hidden(self, batch_size):
        
        '''
        initialize hidden state
        : param batch_size:    x_input.shape[1]
        : return:              zeroed hidden state and cell state 
        '''
        
        return (torch.zeros(self.num_layers, batch_size, self.hidden_size),
                torch.zeros(self.num_layers, batch_size, self.hidden_size))

In [7]:
enc = bilstm_encoder(vocab_size, 30, 20)
out, enc_hidden_state = enc.forward(X_train)
out.shape, enc_hidden_state[0].shape, enc_hidden_state[1].shape

(torch.Size([43, 5, 60]), torch.Size([2, 5, 30]), torch.Size([2, 5, 30]))

In [8]:
# No bi - (torch.Size([43, 5, 30]), torch.Size([1, 5, 30]), torch.Size([1, 5, 30]))
# With bi - (torch.Size([43, 5, 60]), torch.Size([2, 5, 30]), torch.Size([2, 5, 30]))

In [10]:
class bilstm_decoder(nn.Module):
    ''' Decodes hidden state output by encoder '''
    
    def __init__(self, input_size, hidden_size, emb_size, output_size, num_layers = 2, dropout = 0):

        '''
        : param input_size:     the number of features in the input X
        : param hidden_size:    the number of features in the hidden state h
        : param num_layers:     number of recurrent layers (i.e., 2 means there are
        :                       2 stacked LSTMs)
        '''
        
        super(bilstm_decoder, self).__init__()
        self.input_size = input_size
        self.hidden_size = hidden_size
        self.num_layers = num_layers
        
        if DEBUG_FLAG:
            print(f"Encoder: input_size {input_size} - hidden_size {hidden_size} - emb_size {emb_size} - output_size {output_size}")
        
        # define embeddings
        self.embeddings = nn.Embedding(input_size, emb_size)
        
        self.lstm = nn.LSTM(input_size = emb_size,
                            hidden_size = hidden_size,
                            num_layers = num_layers,
                            bidirectional = False,
                            dropout = dropout)
        # bi is true
        # self.linear = nn.Linear(2*hidden_size, output_size)
        # num_layers = 1
        self.linear = nn.Linear(hidden_size, output_size)

    def forward(self, x_input, encoder_hidden_states):
        
        '''        
        : param x_input:                    should be 2D (1, batch_size) #, input_size)
        : param encoder_hidden_states:      hidden states
        : return output, hidden:            output gives all the hidden states in the sequence;
        :                                   hidden gives the hidden state and cell state for the last
        :                                   element in the sequence 
 
        '''
        if DEBUG_FLAG:
            print(f"Decoder x_input size - {x_input.shape}")
        x_input = x_input.unsqueeze(0)
        # x_input size: (1, batch_size)
        if DEBUG_FLAG:
            print(f"Decoder x_input size - {x_input.shape}")
        
        embedded = self.embeddings(x_input)
        # embedded size: (1, batch_size, embedding_size)
        if DEBUG_FLAG:
            print(f"Decoder embedded size - {embedded.shape}")
            print(f"Decoder encoder_hidden_states size - {encoder_hidden_states[0].shape}")
            
        lstm_out, self.hidden = self.lstm(embedded, encoder_hidden_states)
        # lstm_out size: (1, batch_size, hidden_size)
        if DEBUG_FLAG:
            print(f"Decoder lstm_out size - {lstm_out.shape}")
            print(f"Decoder hidden size - {self.hidden[0].shape}")
        
        lstm_out = lstm_out.squeeze(0)
        if DEBUG_FLAG:
            print(f"Decoder lstm_out size - {lstm_out.shape}")
        output = self.linear(lstm_out) 
        if DEBUG_FLAG:
            print(f"Decoder output size - {output.shape}")
        # output size: (1, batch_size, vocab_size)
        
#         output = output.squeeze(0)
#         print(f"Decoder output size - {output.shape}")
        # output size: (batch_size, vocab_size)
        
        return output, self.hidden

In [10]:
dec = bilstm_decoder(vocab_size, 30, 20, vocab_size)
out, hidden_state = dec.forward(Y_train[0], enc_hidden_state)
out.shape, hidden_state[0].shape, hidden_state[1].shape

(torch.Size([5, 5938]), torch.Size([2, 5, 30]), torch.Size([2, 5, 30]))

In [11]:
class lstm_seq2seq(nn.Module):
    ''' train LSTM encoder-decoder and make predictions '''
    
    def __init__(self, encoder, decoder):

        '''
        : param input_size:     the number of expected features in the input X
        : param hidden_size:    the number of features in the hidden state h
        '''

        super(lstm_seq2seq, self).__init__()

        self.encoder = encoder
        self.decoder = decoder


    def forward(self, source, target, target_vocab_size, teacher_force_ratio = 0.5):
        batch_size = source.shape[1]
        target_len = target.shape[0]
        
        outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
        
        # encoder outputs
        encoder_output, encoder_hidden = self.encoder.forward(source)
        
        # Grab start token
        x = target[0]
#         print(f"seq2seq x size - {x.shape}")
        
        for t in range(1, target_len):
            # decoder outputs
            decoder_output, decoder_hidden = self.decoder(x, encoder_hidden)
            
            outputs[t] = decoder_output
            # output size: (N, vocab_size)
            
            if DEBUG_FLAG: print(f"seq2seq decoder_output size - {decoder_output.shape}")
            best_guess = decoder_output.argmax(1)
            
            if DEBUG_FLAG: print(f"seq2seq best_guess size - {best_guess.shape} - {best_guess}")
            
            if DEBUG_FLAG: print(f"seq2seq target size - {target[t].shape} - {target[t]}")
            
            x = target[t] if random.random() < teacher_force_ratio else best_guess
            
        return outputs
    
    def predict(self, source, target_len, target_vocab_size, sos_idx):
        
        target_len = target_len+2
        batch_size = source.shape[1]

#         outputs = torch.zeros(target_len, batch_size, target_vocab_size).to(device)
        outputs = torch.zeros(target_len, batch_size).to(device)

        # encoder outputs
        encoder_output, encoder_hidden = self.encoder.forward(source)

        # Grab start token
        x = torch.from_numpy(np.array([sos_idx]*batch_size))
        
        for t in range(1, target_len):
            
            # decoder outputs
            decoder_output, decoder_hidden = self.decoder(x, encoder_hidden)

            # outputs[t] = decoder_output
            # output size: (N, vocab_size)

            best_guess = decoder_output.argmax(1)
            x = best_guess
            outputs[t] = best_guess

        return outputs

In [12]:
def eval_ckpt(dev_input_tensor, dev_target_tensor, model, criterion, epoch, batch_size, output_size):
    
    # calculate number of batch iterations
    n_batches = int(dev_input_tensor.shape[1] / batch_size)
#     print(f"Number of batches - {n_batches}")
    
    batch_loss = 0
    batch_count = 0
    
    model2.eval()
    with trange(n_batches) as tr:
        for b in tr:
          # select data 
          inp_data = dev_input_tensor[:, b*batch_size : (b+1)*batch_size] #, :]
          target = dev_target_tensor[:, b*batch_size : (b+1)*batch_size] #, :]

          if torch.cuda.is_available():
              inp_data, target = inp_data.cuda(), target.cuda()

          output = model.forward(inp_data, target, output_size)

          output = output[1:].reshape(-1, output.shape[2])
          target = target[1:].reshape(-1)

          if torch.cuda.is_available():
              output = output.cuda()

          # compute the loss
          loss = criterion(output, target)
          batch_loss += loss.item()

          batch_count += 1
                
    # loss for epoch 
    batch_loss /= n_batches
            
    return batch_loss

In [18]:
def train(input_tensor, target_tensor, dev_input_tensor, dev_target_tensor,
          emb_size, hidden_size, vocab_size,
          load_model = False, num_epochs = 2, lr = 0.0005, batch_size = 5):
  
    sub_folder_name = f"baseline_lr{lr}_bs{batch_size}_es{emb_size}_hs{hidden_size}"
    models_directory = f"models/{sub_folder_name}"

    if not os.path.isdir(models_directory):
      os.makedirs(models_directory)
    
    input_size_encoder = vocab_size             # german
    input_size_decoder = vocab_size             # english
    output_size = vocab_size                    # english

    encoder_embedding_size = emb_size
    decoder_embedding_size = emb_size

    # TENSORBOARD
    writer = SummaryWriter(f'tb/loss_plot/{sub_folder_name}')
    step = 0

    encoder_net = bilstm_encoder(input_size_encoder, hidden_size, emb_size).to(device)
    decoder_net = bilstm_decoder(input_size_decoder, hidden_size, emb_size, output_size).to(device)
    
    model = lstm_seq2seq(encoder_net, decoder_net).to(device)
    
    optimizer = optim.Adam(model.parameters(), lr=lr)
    criterion = nn.CrossEntropyLoss(ignore_index=pad_idx)
    
#     if load_model: load_checkpoint(torch.load('my_checkpoint.pth.ptar'), model, optimizer)
        
    # calculate number of batch iterations
    n_batches = int(input_tensor.shape[1] / batch_size)
    print(f"Number of batches - {n_batches}")
    
    # initialize array of losses 
    losses = np.full(num_epochs, np.nan)
    losses_v2 = {'train': {}, "val": {}}

    for epoch in range(1, num_epochs+1):
        

    #         checkpoint = {'state_dict': model.state_dict(),
    #                       'optimizer': optimizer.state_dict()}
    #         save_checkpoint(checkpoint)

        batch_loss = 0
        batch_count = 0

        with trange(n_batches) as tr:
            for b in tr:
        
              # select data 
              inp_data = input_tensor[:, b*batch_size : (b+1)*batch_size] #, :]
              target = target_tensor[:, b*batch_size : (b+1)*batch_size] #, :]

              if torch.cuda.is_available():
                  inp_data, target = inp_data.cuda(), target.cuda()
              
  #             if step < 1:
  #                 print(f"batch_size - {b*batch_size} - {(b+1)*batch_size}")
  #                 print(f"inp_data 0 - {inp_data}")

  #                 if step < 2:
  #                     print(f"inp_data 0 - {inp_data.shape}")
  #                     print(f"target 0 - {target.shape}")

              output = model.forward(inp_data, target, output_size)
              # output shape: (target_len, batch_size, output_dim)

  #             if step < 1:
  #                 print(f"output size before reshape - {output.shape}")
  #                 print(f"target size before reshape - {target.shape}")

              output = output[1:].reshape(-1, output.shape[2])
              target = target[1:].reshape(-1)

              if torch.cuda.is_available():
                  output = output.cuda()

  #             if step < 1:
  #                 print(f"output size after reshape - {output.shape}")
  #                 print(f"target size after reshape - {target.shape}")

  #                 output = output.argmax(2)
  #                 if step < 2:
  #                     print(f"output 3 - {output.shape} - {type(output)} - {output[:5]}")
  #                     print(f"target 3 - {target.shape} - {type(target)} - {target[:5]}")

              # zero the gradient
              optimizer.zero_grad()
            
            
              print(f"output - {output.size()}")
              print(f"target - {target.size()}")
              # compute the loss
              loss = criterion(output, target)
              batch_loss += loss.item()

              # backpropagation
              loss.backward()
              torch.nn.utils.clip_grad_norm_(model.parameters(), max_norm=1) # for healthy gradients
              optimizer.step()

              writer.add_scalar('Training loss', loss, global_step=step)
              step += 1

              batch_count += 1
              acc_batch_loss = batch_loss/batch_count
            
              # progress bar 
              tr.set_postfix({"epoch_num":epoch,
                              "loss":f"{acc_batch_loss:.3f}"})
        
        # loss for epoch 
        batch_loss /= n_batches 
        losses[epoch-1] = batch_loss
        losses_v2['train'][epoch] = batch_loss
        
        val_loss = eval_ckpt(dev_input_tensor, dev_target_tensor, model, criterion,
                            epoch, batch_size, output_size)
        losses_v2['val'][epoch] = val_loss
        print(f"val loss {val_loss}")
        
        break
        
        with open(os.path.join(models_directory, "loss.json"), 'w') as f:
          json.dump(losses_v2, f)
            
        # save models
        if (epoch > 4 and epoch % 2 == 0):
            torch.save(model, os.path.join(models_directory, f"model-{epoch}"))
    
#     break
#     torch.save(model, os.path.join(models_directory, f"model_last_{epoch}"))
            
    return losses, model

In [19]:
loss, model = train(X_train, Y_train, X_val, Y_val, emb_size=20, hidden_size=10, vocab_size=vocab_size,
            num_epochs = 2, lr = 0.001, batch_size = 2)

Number of batches - 2


  0%|                                                                                                                                                                                   | 0/2 [00:00<?, ?it/s]

output - torch.Size([256, 5938])
target - torch.Size([256])


100%|██████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 2/2 [00:00<00:00,  5.08it/s, epoch_num=1, loss=8.733]

output - torch.Size([256, 5938])
target - torch.Size([256])





NameError: name 'model2' is not defined

# Predict

In [3]:
import os
import torch

os.getcwd()

'/Users/aishwarya/Downloads/spring23/cs685-NLP/project/notebooks'

## Import data

In [19]:

# local
# target_folder = "/Users/aishwarya/Downloads/spring23/cs685-NLP/project/LSTM_encoder_decoder/data/data_final_processed_v2"
# target_folder = "/Users/aishwarya/Downloads/spring23/cs685-NLP/project/data/baseline/training_data"

# for colab
target_folder = "../data/baseline/training_data"

fol1 = 'test'

## GET DATA
#sample data for checking network
data_t = 'encode'
X_val_np = np.load(os.path.join(target_folder, fol1, f"{fol1}_{data_t}.npy"))
# X_val_np = X_val_np[:5]
X_val_np = X_val_np.transpose(1,0)
# train_input = np.expand_dims(train_input, axis=-1) 
print(f'Train encoder input - {X_val_np.shape}')


#sample data for checking network
data_t = 'decode'
Y_val_np = np.load(os.path.join(target_folder, fol1, f"{fol1}_{data_t}.npy"))
# Y_val_np = Y_val_np[:5]
Y_val_np = Y_val_np.transpose(1,0)
# train_output = np.expand_dims(train_output, axis=-1) 
print(f'Train decoder input - {Y_val_np.shape}')

with open(os.path.join(target_folder, 'data_info.json'), 'r') as fp:
    data_info = json.load(fp)
    
pad_idx = data_info['pad_idx']
sos_idx = data_info['sos_idx']
eos_idx = data_info['eos_idx']
vocab_size = data_info['vocab_size']
max_decoder_len = data_info["max_decoder_len"]
    
print(data_info)

# convert numpy array to tensors
X_val = torch.from_numpy(X_val_np).type(torch.int64) #torch.int64, torch.Tensor
Y_val = torch.from_numpy(Y_val_np).type(torch.int64)

X_val.shape, Y_val.shape


Train encoder input - (43, 696)
Train decoder input - (129, 696)
{'vocab_size': 5938, 'max_encoder_len': 43, 'max_decoder_len': 127, 'pad_idx': 1462, 'sos_idx': 1463, 'eos_idx': 1461}


(torch.Size([43, 696]), torch.Size([129, 696]))

In [20]:
with open(os.path.join(target_folder, 'idx_to_vocab.json'), 'r') as fp:
    idx_to_vocab = json.load(fp)

## Import model

In [21]:
# Model class must be defined somewhere
# model_path = os.path.join(os.getcwd(), "models/baseline_lr0.001_bs64_es200_hs100_v2/model_last_100")
model_path = os.path.join(os.getcwd(), "models/baseline_lr0.0005_bs64_es300_hs200/model_98")

model2 = torch.load(model_path, map_location=torch.device('cpu'))
model2.eval()

lstm_seq2seq(
  (encoder): bilstm_encoder(
    (embeddings): Embedding(5938, 300)
    (lstm): LSTM(300, 200, bidirectional=True)
  )
  (decoder): bilstm_decoder(
    (embeddings): Embedding(5938, 300)
    (lstm): LSTM(300, 200, num_layers=2)
    (linear): Linear(in_features=200, out_features=5938, bias=True)
  )
)

In [22]:
outputs = model2.predict(X_val, max_decoder_len, vocab_size, sos_idx)

In [23]:
X_val.shape, Y_val.shape, outputs.shape

(torch.Size([43, 696]), torch.Size([129, 696]), torch.Size([129, 696]))

In [24]:
output_mat = []
for c in range(outputs.shape[1]):
    
    check = outputs[:,c].numpy()
    col = []
    for x, value in np.ndenumerate(check):

        value_2 = int(value)
        
        # skip is sos
        if x[0] == 0:
            continue
        
        # exit loop if eos
        if value_2 == eos_idx:
            break
        
        text = idx_to_vocab[str(value_2)]
        col.append(text)
   
    output_mat.append(col)

len(output_mat), output_mat[0]

(696, ['select', 'count', '(', '*', ')', 'from', 'event'])

In [25]:
gt_mat = []
for c in range(Y_val.shape[1]):
    
    check = Y_val[:,c].numpy()
    col = []
    for x, value in np.ndenumerate(check):

        value_2 = int(value)
        
        # skip is sos
        if x[0] == 0:
            continue
        
        # exit loop if eos
        if value_2 == eos_idx:
            break
        
        text = idx_to_vocab[str(value_2)]
        col.append(text)
   
    gt_mat.append(col)

len(gt_mat), gt_mat[0]

(696,
 ['select',
  'count',
  '(',
  '*',
  ')',
  ',',
  'competition',
  'from',
  'match',
  'group',
  'by',
  'competition'])

In [26]:
idy = 200
gt_mat[idy], output_mat[idy]

(['select',
  't2',
  '.',
  'name',
  'from',
  'assignedto',
  'as',
  't1',
  'join',
  'scientists',
  'as',
  't2',
  'on',
  't1',
  '.',
  'scientist',
  '=',
  't2',
  '.',
  'ssn'],
 ['select',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  't1',
  '.',
  

In [28]:
count_em = 0
for i in range(len(gt_mat)):
    
    gt = ' '.join(gt_mat[i])
    pred = ' '.join(output_mat[i])
    
    if gt == pred:
        count_em += 1
        print(gt)
        print(pred)
        print(gt == pred)
        print()
        
    else:
        print(gt)
        print(pred)
        print()
        
count_em, count_em/len(gt_mat)

select count ( * ) , competition from match group by competition
select count ( * ) from event

select count ( * ) , competition from match group by competition
select count ( * ) from

select venue from match order by date desc
select date , by sales_billion desc

select venue from match order by date desc
select date from

select gdp from city order by regional_population desc limit 1
select official_name from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city from city

(6, 0.008620689655172414)