<a href="https://colab.research.google.com/github/seansphd/ISEA/blob/seansphd-patch-1/Themes_Analysis_ISEA.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
# -*- coding: utf-8 -*-
"""Copy of ISEAV2.ipynb

Automatically generated by Colab.

Original file is located at
    https://colab.research.google.com/drive/1sfUKoXk_mWZW3RV91ySEYRJzr0MXAAjl
"""

# Step 1: Install Required Libraries
!pip install pandas spacy nltk tqdm

# Step 2: Import Libraries and Load NLP Model
import pandas as pd
import spacy
import nltk
from nltk.corpus import stopwords
from tqdm import tqdm
from google.colab import files
import io  # Make sure to import the io module

# Download necessary NLTK data
nltk.download('stopwords')

# Download spacy model if not already present
!python -m spacy download en_core_web_sm

# Load Spacy NLP model
nlp = spacy.load("en_core_web_sm")
stop_words = set(stopwords.words('english'))

tqdm.pandas()

# Step 3: Prompt to upload the CSV file
uploaded = files.upload()

# Load the uploaded file into a DataFrame
# Assuming there's only one file uploaded
filename = next(iter(uploaded))
df = pd.read_csv(io.BytesIO(uploaded[filename]))

# Verify that the DataFrame has been loaded correctly
print("DataFrame loaded successfully. Here are the first few rows:")
print(df.head())

# Step 4: Define Functions for Text Processing

# Define sets of synonyms for each PEST category
political_terms = set(['state', 'politics', 'institution', 'censorship', 'propaganda', 'activism', 'globalization', 'power dynamics', 'social justice', 'identity politics', 'war', 'surveillance', 'governance', 'diplomacy', 'politics', 'government', 'legislation', 'policy', 'federal', 'legislative', 'civic tech', 'public policy'])
economic_terms = set(['trade', 'capitalism', 'open source', 'license', 'commons', 'finance', 'commerce', 'Bitcoin' , 'Blockchain', 'NFT', 'FLOSS', 'consumerism', 'gentrification', 'commodification', 'labor', 'sustainability', 'class', 'funding', 'patronage'])
social_terms = set(['equality', 'representation', 'ability', 'gender', 'culture', 'race', 'sexuality', 'class conflict', 'mental health', 'community', 'access', 'education', 'cultural studies', 'ethnic', 'community engagement', 'integration', 'lifestyle', 'social change', 'social norms', 'society', 'pop culture', 'cultural'])
technology_terms = set(['3D printing', 'coding', 'programming', 'digital art', 'AI-generated art', 'tech', 'technology', 'web', 'interactive art installations', 'VR', 'digital sculpting', 'digital sculpture', 'information technology', 'computer', 'augmented reality', 'digital rendering', 'algorithm', 'algorithmic art', 'procedural art', 'digital fabrication', 'software', 'cyber', 'digital painting', 'virtual reality', '3D art', 'bio art', 'early digital art movements', 'glitch art', 'computational', 'innovation', 'data', 'technological', 'digital', 'machine learning', 'data art', 'AI', 'generative art', 'network', 'artificial intelligence', 'hardware', 'net art', 'computer graphics', 'pixel art', 'net', 'graphics', 'AR', 'cybernetics', 'internet'])
art_form_terms = set(['installation', 'panel', 'paper', 'performance', 'net art', 'video', 'sound', 'digital painting', 'interactive', 'virtual reality', 'augmented reality', 'algorithmic', '3D printing', 'bio art', 'environmental', 'light', 'street', 'conceptual', 'electronic', 'animation', 'photography', 'sculpture', 'mixed media', 'data visualization', 'kinetic', 'robotic', 'wearable', 'site-specific'])

def categorize_pest(description):
    try:
        doc = nlp(description.lower())
        categories = []
        for token in doc:
            lemma = token.lemma_
            if lemma in political_terms:
                categories.append('Political')
            if lemma in economic_terms:
                categories.append('Economic')
            if lemma in social_terms:
                categories.append('Social')
            if lemma in technology_terms:
                categories.append('Technological')
        return ';'.join(set(categories)) if categories else 'No categories found'
    except Exception as e:
        return f"Error: {str(e)}"

def generate_theme(description):
    """
    Generate a theme based on the description using NLP techniques.
    """
    try:
        # A simple implementation that extracts nouns as themes
        doc = nlp(description)
        # Get nouns that aren't in stopwords
        nouns = [token.text for token in doc if token.pos_ == "NOUN" and token.text.lower() not in stop_words]
        # Get the 3 most common nouns if available
        from collections import Counter
        common_nouns = [word for word, count in Counter(nouns).most_common(3)]
        return ", ".join(common_nouns) if common_nouns else "No theme identified"
    except Exception as e:
        return f"Error: {str(e)}"

def find_art_terms(description):
    try:
        text_lower = description.lower()
        found_art_terms = set()

        # Check for multi-word terms
        for term in art_form_terms:
            if term in text_lower:
                found_art_terms.add(term)

        return ';'.join(found_art_terms)
    except Exception as e:
        return f"Error: {str(e)}"

def analyze_interconnectedness_simple(description):
    """
    A simpler version that doesn't use OpenAI but instead checks for keywords
    related to interconnectedness in systems thinking
    """
    try:
        interconnected_terms = [
            'system', 'network', 'connection', 'interaction', 'relationship',
            'flow', 'influence', 'dependency', 'feedback', 'ecosystem',
            'holistic', 'integrated', 'interdependent', 'linked', 'connected'
        ]

        text_lower = description.lower()
        found_terms = [term for term in interconnected_terms if term in text_lower]

        if found_terms:
            return f"Found interconnectedness concepts: {', '.join(found_terms)}"
        else:
            return "No explicit interconnectedness concepts found"
    except Exception as e:
        return f"Error: {str(e)}"

# Step 5: Apply Functions to DataFrame in Separate Steps
print("Starting PEST categorization...")
df['PEST'] = df['description'].progress_apply(categorize_pest)

print("Generating themes...")
df['Theme'] = df['description'].progress_apply(generate_theme)

print("Running simple interconnectedness analysis...")
df['Interconnectedness_Analysis'] = df['description'].progress_apply(analyze_interconnectedness_simple)

print("Finding art terms...")
df['Art Terms'] = df['description'].progress_apply(find_art_terms)

# Step 6: Save the Updated DataFrame to a CSV File
output_filename = 'updated_file_with_all_entries.csv'
df.to_csv(output_filename, index=False)
print(f"Analysis complete! Results saved to {output_filename}")



[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


Collecting en-core-web-sm==3.8.0
  Downloading https://github.com/explosion/spacy-models/releases/download/en_core_web_sm-3.8.0/en_core_web_sm-3.8.0-py3-none-any.whl (12.8 MB)
[2K     [90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━[0m [32m12.8/12.8 MB[0m [31m100.3 MB/s[0m eta [36m0:00:00[0m
[?25h[38;5;2m✔ Download and installation successful[0m
You can now load the package via spacy.load('en_core_web_sm')
[38;5;3m⚠ Restart to reload dependencies[0m
If you are in a Jupyter or Colab notebook, you may need to restart Python in
order to load all the package's dependencies. You can do this by selecting the
'Restart kernel' or 'Restart runtime' option.


Saving ISEA_Full.csv to ISEA_Full.csv
DataFrame loaded successfully. Here are the first few rows:
                                               title          authors  year  \
0  Epigenetic Painting: Software as Genotype, a N...   Roman Verostko  1988   
1  Some Issues in the Development of Computer Art...   Richard Wright  1988   
2                  Logic and Time-based Art Practice   Ernest Edmonds  1988   
3  Extended Musical Interface with the Human Nerv...  David Rosenboom  1988   
4  Towards a Universal and Intelligent MIDI-Based...  Philippe Menard  1988   

                                                link  \
0  https://www.isea-archives.org/symposia/fisea-1...   
1        https://www.isea-archives.org/?page_id=1248   
2        https://www.isea-archives.org/?page_id=1251   
3        https://www.isea-archives.org/?page_id=1253   
4  https://www.isea-archives.org/symposia/fisea-1...   

                                         description  \
0  Abstract This paper explores is

100%|██████████| 3614/3614 [02:40<00:00, 22.53it/s]


Generating themes...


100%|██████████| 3614/3614 [02:41<00:00, 22.34it/s]


Running simple interconnectedness analysis...


100%|██████████| 3614/3614 [00:00<00:00, 30686.82it/s]


Finding art terms...


100%|██████████| 3614/3614 [00:00<00:00, 18118.38it/s]


Analysis complete! Results saved to updated_file_with_all_entries.csv
