# Identifying Generative AI Applications in Smart City Research

In [2]:
# Import Required Libraries
import json
import pandas as pd
import numpy as np
import re
from collections import Counter
import matplotlib.pyplot as plt
import seaborn as sns
from tqdm.notebook import tqdm
import os
from transformers import pipeline
from matplotlib.ticker import MaxNLocator
import torch
from sentence_transformers import SentenceTransformer, util
import spacy
from transformers import AutoModelForSequenceClassification, AutoTokenizer
from sklearn.metrics import classification_report, confusion_matrix
import json
import re
from collections import Counter
import torch.nn.functional as F

# Set display options
pd.set_option("display.max_colwidth", 100)
sns.set(style="whitegrid")
plt.rcParams.update({"font.size": 12})
plt.rcParams.update({"font.family": "Times New Roman"})

# Set tokenizers parallelism environment variable
os.environ["TOKENIZERS_PARALLELISM"] = "false"

# Load and Preprocess the data

In [3]:
# Load the abstracts dataset
with open("../data/04_document_search_results_with_intros_and_contributions.json", "r", encoding="utf8") as f:
    abstracts_data = json.load(f)

print(f"Loaded {len(abstracts_data)} records")

# Create a DataFrame for easier manipulation
df = pd.DataFrame(abstracts_data)

# Display the first few rows to understand the structure
df.head(3)

Loaded 580 records


Unnamed: 0,title,authors,journal,doi,publication_date,document_type,prism:url,scopus_id,abstract,author_keywords,subject_areas,introduction,contribution
0,An interactive address matching method based on a graph attention mechanism,Li M.,International Journal of Cognitive Computing in Engineering,10.1016/j.ijcce.2024.12.003,2025,Article,https://api.elsevier.com/content/abstract/scopus_id/85213870668,85213870668,Problem: Modernizing and standardizing place names and addresses is a key challenge in the devel...,"[Address matching, Attention-based feature interaction method, Directed graph, Interactive addre...","[Information Systems, Engineering (miscellaneous), Computer Science Applications, Information Sy...",Problem: Modernizing and standardizing place names and addresses is a key challenge in the devel...,"Purpose: This paper proposes a solution to address matching challenges, such as incomplete descr..."
1,Intelligent pattern design using 3D modelling technology for urban sculpture designing,Wan W.,Systems and Soft Computing,10.1016/j.sasc.2024.200176,2025,Article,https://api.elsevier.com/content/abstract/scopus_id/85212000320,85212000320,3D modeling is actuality hired more and more by cities to improve urban planning and cultural pr...,"[3D modelling, Urban sculpture designing, 3D-SAE (3D-Sculpture Analysis and Estimation), Convolu...","[Software, Theoretical Computer Science, Computer Science Applications, Computational Theory and...",3D modeling is actuality hired more and more by cities to improve urban planning and cultural pr...,3D modeling is actuality hired more and more by cities to improve urban planning and cultural pr...
2,GeoAvatar: A big mobile phone positioning data-driven method for individualized pseudo personal ...,Li P.,"Computers, Environment and Urban Systems",10.1016/j.compenvurbsys.2025.102252,2025,Article,https://api.elsevier.com/content/abstract/scopus_id/86000553754,86000553754,"The importance of personal mobility data is widely recognized in various fields. However, the ut...","[Big mobility data, Generative model, GIS, Mahince learning, Smart City]","[Geography, Planning and Development, Ecological Modeling, Environmental Science (all), Urban St...","The importance of personal mobility data is widely recognized in various fields. However, the ut...","Our method utilizes a deep generative model to generate heterogeneous individual life patterns, ..."


# Initialize Sentence Classifier

In [4]:
# # Initialize zero-shot classifier
# print("Initializing zero-shot classifier...")
# classifier = pipeline(
#     "zero-shot-classification",
#     model="MoritzLaurer/deberta-v3-large-zeroshot-v2.0-c",
#     device=DEVICE,
# )
# print("Zero-shot classifier initialized")

In [5]:
import json
import torch
import torch.nn.functional as F
from transformers import AutoModelForSequenceClassification, AutoTokenizer

class GenAIClassifier:
    def __init__(self, model_path, config_path, temperature=1.0):
        # Load model
        self.model = AutoModelForSequenceClassification.from_pretrained(model_path)

        # Set model name and device
        model_name = "microsoft/deberta-v3-base"
        self.device = torch.device("cuda" if torch.cuda.is_available() else
                                   "mps" if torch.backends.mps.is_available() else
                                   "cpu")
        self.tokenizer = AutoTokenizer.from_pretrained(model_name)
        self.model.to(self.device)

        # Load config
        with open(config_path, 'r') as f:
            self.config = json.load(f)

        self.max_length = 256
        self.id2label = self.config.get("id2label", {})
        self.temperature = temperature  # 🔥 temperature scaling

    def predict(self, text, threshold=0.5):
        # Tokenize
        inputs = self.tokenizer(
            text,
            truncation=True,
            padding='max_length',
            max_length=self.max_length,
            return_tensors='pt'
        ).to(self.device)

        # Get predictions with temperature scaling
        with torch.no_grad():
            outputs = self.model(**inputs)
            logits = outputs.logits / self.temperature  # 🔥 Apply temperature scaling
            probabilities = F.softmax(logits, dim=-1)
            probs = probabilities.cpu().numpy()[0]

        # Determine class with threshold
        genai_prob = probs[0]
        predicted_class_id = 0 if genai_prob >= threshold else 1
        predicted_label = self.id2label.get(str(predicted_class_id), f"Class {predicted_class_id}")

        return {
            'predicted_label': predicted_label,
            'probabilities': {
                self.id2label.get(str(i), f"Class {i}"): float(prob)
                for i, prob in enumerate(probs)
            },
            'threshold': threshold,
            'genai_confidence': float(genai_prob)
        }

In [6]:
classifier = GenAIClassifier("../model/output/best_model", "../model/output/best_model/config.json", temperature=0.602)



In [7]:
def classify_genai_application(text):
    """
    Uses GenAIClassifier to determine if text describes a GenAI application
    
    Args:
        text (str): Text to classify
        
    Returns:
        dict: Classification results
    """
    if not isinstance(text, str) or not text.strip():
        return {"is_genai_application": False, "score": 0.0}
    
    try:
        # Use the predict method of your custom classifier
        result = classifier.predict(text, threshold=0.65)
        
        # Extract the prediction and score
        predicted_label = result['predicted_label']
        
        # Check if the predicted label indicates a GenAI application
        is_genai_application = "GenAI used for smart city application" in predicted_label
        
        # Get the score for this label
        score = result['probabilities'].get(predicted_label, 0.0)
        
        return {
            "is_genai_application": is_genai_application,
            "top_label": predicted_label,
            "score": score,
            "all_labels": list(result['probabilities'].keys()),
            "all_scores": list(result['probabilities'].values()),
        }
    except Exception as e:
        print(f"Error in classification: {e}")
        return {"is_genai_application": False, "score": 0.0}

# Apply Analysis to Abstracts

In [8]:
# Apply zero-shot classification
tqdm.pandas(desc="Applying zero-shot classification")
df["genai_classification"] = df["contribution"].progress_apply(
    classify_genai_application
)
df["is_genai_application"] = df["genai_classification"].apply(
    lambda x: x["is_genai_application"]
)
df["classification_score"] = df["genai_classification"].apply(
    lambda x: x.get("score", 0)
)

print(
    f"Abstracts classified as GenAI applications: {df['is_genai_application'].sum()} ({df['is_genai_application'].mean()*100:.2f}%)"
)

Applying zero-shot classification:   0%|          | 0/580 [00:00<?, ?it/s]

Abstracts classified as GenAI applications: 307 (52.93%)


In [9]:
df

Unnamed: 0,title,authors,journal,doi,publication_date,document_type,prism:url,scopus_id,abstract,author_keywords,subject_areas,introduction,contribution,genai_classification,is_genai_application,classification_score
0,An interactive address matching method based on a graph attention mechanism,Li M.,International Journal of Cognitive Computing in Engineering,10.1016/j.ijcce.2024.12.003,2025,Article,https://api.elsevier.com/content/abstract/scopus_id/85213870668,85213870668,Problem: Modernizing and standardizing place names and addresses is a key challenge in the devel...,"[Address matching, Attention-based feature interaction method, Directed graph, Interactive addre...","[Information Systems, Engineering (miscellaneous), Computer Science Applications, Information Sy...",Problem: Modernizing and standardizing place names and addresses is a key challenge in the devel...,"Purpose: This paper proposes a solution to address matching challenges, such as incomplete descr...","{'is_genai_application': False, 'top_label': 'Not related', 'score': 0.7041826844215393, 'all_la...",False,0.704183
1,Intelligent pattern design using 3D modelling technology for urban sculpture designing,Wan W.,Systems and Soft Computing,10.1016/j.sasc.2024.200176,2025,Article,https://api.elsevier.com/content/abstract/scopus_id/85212000320,85212000320,3D modeling is actuality hired more and more by cities to improve urban planning and cultural pr...,"[3D modelling, Urban sculpture designing, 3D-SAE (3D-Sculpture Analysis and Estimation), Convolu...","[Software, Theoretical Computer Science, Computer Science Applications, Computational Theory and...",3D modeling is actuality hired more and more by cities to improve urban planning and cultural pr...,3D modeling is actuality hired more and more by cities to improve urban planning and cultural pr...,"{'is_genai_application': True, 'top_label': 'GenAI used for smart city application', 'score': 0....",True,0.937137
2,GeoAvatar: A big mobile phone positioning data-driven method for individualized pseudo personal ...,Li P.,"Computers, Environment and Urban Systems",10.1016/j.compenvurbsys.2025.102252,2025,Article,https://api.elsevier.com/content/abstract/scopus_id/86000553754,86000553754,"The importance of personal mobility data is widely recognized in various fields. However, the ut...","[Big mobility data, Generative model, GIS, Mahince learning, Smart City]","[Geography, Planning and Development, Ecological Modeling, Environmental Science (all), Urban St...","The importance of personal mobility data is widely recognized in various fields. However, the ut...","Our method utilizes a deep generative model to generate heterogeneous individual life patterns, ...","{'is_genai_application': True, 'top_label': 'GenAI used for smart city application', 'score': 0....",True,0.870384
3,Demystifying SAR with attention,Patnaik N.,Expert Systems with Applications,10.1016/j.eswa.2025.127182,2025,Article,https://api.elsevier.com/content/abstract/scopus_id/86000797212,86000797212,"Synthetic Aperture Radar (SAR) imagery is indispensable for earth observation, offering the abil...","[Attention, Deep learning, Generative adversarial networks, Image colorization, Image restoratio...","[Engineering (all), Computer Science Applications, Artificial Intelligence]","Synthetic Aperture Radar (SAR) imagery is indispensable for earth observation, offering the abil...","This study introduces an innovative framework for SAR image colorization, leveraging an Attentio...","{'is_genai_application': True, 'top_label': 'GenAI used for smart city application', 'score': 0....",True,0.901335
4,STV AE : Skip connection driven Two-stream property fusion Variational AutoEncoder for cross-reg...,Li Y.,Information Fusion,10.1016/j.inffus.2025.102960,2025,Article,https://api.elsevier.com/content/abstract/scopus_id/85216015347,85216015347,Wastewater treatment plant (WWTP) plays a crucial role in achieving social sustainable developme...,"[Cross-region semantic segmentation, Domain adaptation, Property fusion, Remote sensing, Wastewa...","[Software, Signal Processing, Information Systems, Hardware and Architecture]",Wastewater treatment plant (WWTP) plays a crucial role in achieving social sustainable developme...,"In this paper, we propose a Skip connection driven Two-stream property fusion Variational AutoEn...","{'is_genai_application': False, 'top_label': 'Not related', 'score': 0.626494824886322, 'all_lab...",False,0.626495
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
575,"Macro-level traffic safety analysis in Shanghai, China",Wang X.,Accident Analysis and Prevention,10.1016/j.aap.2019.02.014,2019,Article,https://api.elsevier.com/content/abstract/scopus_id/85061832713,85061832713,"Continuing rapid growth in Shanghai, China, requires traffic safety to be considered at the earl...","[Bayesian conditional autoregressive model, Macro-level safety modeling, Traffic analysis zone, ...","[Human Factors and Ergonomics, Safety, Risk, Reliability and Quality, Public Health, Environment...","Continuing rapid growth in Shanghai, China, requires traffic safety to be considered at the earl...",This study developed a macro-level safety model for 263 traffic analysis zones (TAZs) within the...,"{'is_genai_application': True, 'top_label': 'GenAI used for smart city application', 'score': 0....",True,0.712830
576,Geographical area network-structural health monitoring utility computing model,Tariq H.,ISPRS International Journal of Geo-Information,10.3390/ijgi8030154,2019,Article,https://api.elsevier.com/content/abstract/scopus_id/85063745308,85063745308,In view of intensified disasters and fatalities caused by natural phenomena and geographical exp...,"[Geographical Area Network (GAN), Internet of Things (IoT), Structural Health Monitoring (SHM), ...","[Geography, Planning and Development, Computers in Earth Sciences, Earth and Planetary Sciences ...",In view of intensified disasters and fatalities caused by natural phenomena and geographical exp...,This paper proposes a novel utility computing model (UCM) for structural health monitoring (SHM)...,"{'is_genai_application': True, 'top_label': 'GenAI used for smart city application', 'score': 0....",True,0.906132
577,Interaction effects between technology-driven urbanization and eco-environment: Evidence from Ch...,Gu G.,Sustainability (Switzerland),10.3390/su11030836,2019,Article,https://api.elsevier.com/content/abstract/scopus_id/85061155807,85061155807,"With the rapid pace of urbanization in populous regions, the conflict between economic developme...","[Eco-environment, Interaction effects, Technology-driven, U-shape, Urbanization]","[Computer Science (miscellaneous), Geography, Planning and Development, Renewable Energy, Sustai...","With the rapid pace of urbanization in populous regions, the conflict between economic developme...","Choosing the East Zhejiang region in China as a case study, this paper reveals the interaction e...","{'is_genai_application': False, 'top_label': 'Not related', 'score': 0.683832585811615, 'all_lab...",False,0.683833
578,Spatio-temporal crime predictions in smart cities: A data-driven approach and experiments,Catlett C.,Pervasive and Mobile Computing,10.1016/j.pmcj.2019.01.003,2019,Article,https://api.elsevier.com/content/abstract/scopus_id/85060488293,85060488293,Steadily increasing urbanization is causing significant economic and social transformations in u...,"[Crime prediction, Data analytics, Smart city, Urban computing]","[Software, Information Systems, Hardware and Architecture, Computer Science Applications, Comput...",Steadily increasing urbanization is causing significant economic and social transformations in u...,This paper presents a predictive approach based on spatial analysis and auto-regressive models t...,"{'is_genai_application': False, 'top_label': 'Not related', 'score': 0.4661978781223297, 'all_la...",False,0.466198


# Save the Results

In [10]:
# Save the results to a JSON file only records with is_genai_application = True
results = df[df["is_genai_application"] == True]
# with open("../data/05_filtered_genai_applications.json", "w", encoding="utf8") as f:
#     json.dump(results, f, indent=2)
results.to_json("../data/05_filtered_genai_applications.json", orient="records", indent=4)
    
# Print the statistics of total records and filtered records
print(f"Total records: {len(df)}")
print(f"Filtered records: {len(results)}")

Total records: 580
Filtered records: 307
