<a href="https://colab.research.google.com/github/lee38667/Artificial-Intelligence-Group-Project/blob/main/Artificial_Intelligence_Group_Project.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

**PART 1**

**Loading the CSV into Dataframes**

**Co author comparison graph**

In [1]:
import csv
import sys
from collections import deque

# Data structures
names = {}
scientists = {}
papers = {}
paper_to_scientists = {}
scientist_to_papers = {}

def load_data(directory):
    # Load scientists
    with open(f"{directory}/scientists.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            scientists[row["scientist_id"]] = {
                "name": row["name"],
                "papers": set()
            }
            if row["name"] not in names:
                names[row["name"]] = {row["scientist_id"]}
            else:
                names[row["name"]].add(row["scientist_id"])

    # Load papers
    with open(f"{directory}/papers.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            papers[row["paper_id"]] = {
                "title": row["title"],
                "year": row["year"],
                "authors": set()
            }

    # Load authors
    with open(f"{directory}/authors.csv", encoding="utf-8") as f:
        reader = csv.DictReader(f)
        for row in reader:
            try:
                scientist_id = row["scientist_id"]
                paper_id = row["paper_id"]
                scientists[scientist_id]["papers"].add(paper_id)
                papers[paper_id]["authors"].add(scientist_id)

                # Build mappings
                if paper_id not in paper_to_scientists:
                    paper_to_scientists[paper_id] = set()
                paper_to_scientists[paper_id].add(scientist_id)

                if scientist_id not in scientist_to_papers:
                    scientist_to_papers[scientist_id] = set()
                scientist_to_papers[scientist_id].add(paper_id)
            except KeyError:
                continue

def neighbors_for_person(scientist_id):
    neighbors = set()
    for paper_id in scientist_to_papers.get(scientist_id, set()):
        for coauthor_id in paper_to_scientists.get(paper_id, set()):
            if coauthor_id != scientist_id:
                neighbors.add((paper_id, coauthor_id))
    return neighbors

def shortest_path(source, target):
    if source == target:
        return []

    queue = deque()
    queue.append((source, []))
    visited = set()
    visited.add(source)

    while queue:
        current, path = queue.popleft()
        for paper_id, neighbor in neighbors_for_person(current):
            if neighbor == target:
                return path + [(paper_id, neighbor)]
            if neighbor not in visited:
                visited.add(neighbor)
                queue.append((neighbor, path + [(paper_id, neighbor)]))
    return None

def person_id_for_name(name):
    person_ids = list(names.get(name, set()))
    if not person_ids:
        return None
    elif len(person_ids) > 1:
        print(f"Which '{name}'?")
        for pid in person_ids:
            print(f"ID: {pid}, Name: {scientists[pid]['name']}")
        return input("Intended ID: ").strip()
    else:
        return person_ids[0]

def main():
    if len(sys.argv) != 2:
        sys.exit("Usage: python Scientist.py [directory]")
    directory = sys.argv[1]

    print("Loading data...")
    load_data(directory)
    print("Data loaded.")

    source_name = input("Name: ").strip()
    source_id = person_id_for_name(source_name)
    if source_id is None:
        sys.exit("Scientist not found.")

    target_name = input("Name: ").strip()
    target_id = person_id_for_name(target_name)
    if target_id is None:
        sys.exit("Scientist not found.")

    path = shortest_path(source_id, target_id)

    if path is None:
        print("No connection found.")
    else:
        degrees = len(path)
        print(f"{degrees} degrees of separation.")
        current_id = source_id
        for i, (paper_id, scientist_id) in enumerate(path, 1):
            paper = papers[paper_id]
            next_name = scientists[scientist_id]["name"]
            current_name = scientists[current_id]["name"]
            print(f"{i}: {current_name} and {next_name} co-authored \"{paper['title']}\"")
            current_id = scientist_id

if __name__ == "__main__":
    main()


SystemExit: Usage: python Scientist.py [directory]

  warn("To exit: use 'exit', 'quit', or Ctrl-D.", stacklevel=1)


**Part 2 AI Sudoku Puzzle Solver**


In [None]:
class Sudoku_AI_Solver:
    def __init__(self):
        self.domains = {
            (i, j): set(range(1, 10)) for i in range(9) for j in range(9)
        }

    def load_from_file(self, filename):
        """Load Sudoku puzzle from file with error handling to ensure clear instructions"""
        try:
            with open(filename, 'r') as f:
                lines = [line.strip().replace(" ", "") for line in f if line.strip()]

            if len(lines) != 9:
              #ensure there's no empty spaces even though we've added line correction (first issue encountered)
                raise ValueError("File must contain exactly 9 lines")

            for i in range(9):
                if len(lines[i]) != 9:
                    raise ValueError(f"Line {i+1} must contain exactly 9 characters")
                for j in range(9):
                    val = int(lines[i][j])
                    if val == 0:
                        self.domains[(i, j)] = set(range(1, 10))
                    elif 1 <= val <= 9:
                        self.domains[(i, j)] = {val}
                    else:
                        raise ValueError(f"Invalid value {val} at position ({i}, {j})")
        except FileNotFoundError:
            raise FileNotFoundError(f"File {filename} not found")
        except ValueError as e:
            raise ValueError(f"Invalid Sudoku file: {str(e)}")

    def enforce_node_consistency(self):
        for cell in self.domains:
            if len(self.domains[cell]) == 1:
                val = next(iter(self.domains[cell]))
                self.domains[cell] = {val}

    def revise(self, x, y):
        revised = False
        if len(self.domains[y]) == 1:
            val_y = next(iter(self.domains[y]))
            if val_y in self.domains[x]:
                if len(self.domains[x]) > 1:
                    self.domains[x].remove(val_y)
                    revised = True
        return revised

    def ac3(self):
        queue = [(x, y) for x in self.domains for y in self.get_neighbors(x)]
        while queue:
            x, y = queue.pop(0)
            if self.revise(x, y):
                if len(self.domains[x]) == 0:
                    return False
                for z in self.get_neighbors(x):
                    if z != y:
                        queue.append((z, x))
        return True

    def assignment_complete(self, assignment):
        return all(len(assignment[cell]) == 1 for cell in assignment)

    def consistent(self, assignment):
        for cell in assignment:
            if len(assignment[cell]) == 1:
                val = next(iter(assignment[cell]))
                for neighbor in self.get_neighbors(cell):
                    if len(assignment[neighbor]) == 1:
                        if val == next(iter(assignment[neighbor])):
                            return False
        return True

    def order_domain_values(self, var, assignment):
        def count_conflicts(value):
            count = 0
            for neighbor in self.get_neighbors(var):
                if value in assignment[neighbor]:
                    count += 1
            return count
        return sorted(assignment[var], key=count_conflicts)

    def select_unassigned_variable(self, assignment):
        unassigned = [v for v in assignment if len(assignment[v]) > 1]
        return min(unassigned, key=lambda var: (len(assignment[var]), -len(self.get_neighbors(var))))

    def backtrack(self, assignment):
        if self.assignment_complete(assignment):
            return assignment
        var = self.select_unassigned_variable(assignment)
        for value in self.order_domain_values(var, assignment):
            new_assignment = {k: v.copy() for k, v in assignment.items()}
            new_assignment[var] = {value}
            if self.consistent(new_assignment):
                result = self.backtrack(new_assignment)
                if result:
                    return result
        return None

    def get_neighbors(self, cell):
        i, j = cell
        neighbors = set()
        for k in range(9):
            if k != j:
                neighbors.add((i, k))
            if k != i:
                neighbors.add((k, j))
        top_left_i = 3 * (i // 3)
        top_left_j = 3 * (j // 3)
        for a in range(top_left_i, top_left_i + 3):
            for b in range(top_left_j, top_left_j + 3):
                if (a, b) != cell:
                    neighbors.add((a, b))
        return neighbors

    def solve(self):
        self.enforce_node_consistency()
        if not self.ac3():
            print("AC-3 failed")
            return None
        return self.backtrack(self.domains)

    def display(self, assignment):
        for i in range(9):
            if i % 3 == 0 and i != 0:
                print("-" * 21)
            for j in range(9):
                if j % 3 == 0 and j != 0:
                    print("|", end=" ")
                val = next(iter(assignment[(i, j)]))
                print(val if val != 0 else ".", end=" ")
            print()

if __name__ == "__main__":
    solver = Sudoku_AI_Solver()
    try:
        solver.load_from_file("sudoku_easy.txt")
        solution = solver.solve()
        if solution:
            print("Solution found:")
            solver.display(solution)
        else:
            print("No solution found.")
    except Exception as e:
        print(f"Error: {str(e)}")


Solution found:
5 3 4 | 6 7 8 | 9 1 2 
6 7 2 | 1 9 5 | 3 4 8 
1 9 8 | 3 4 2 | 5 6 7 
---------------------
8 5 9 | 7 6 1 | 4 2 3 
4 2 6 | 8 5 3 | 7 9 1 
7 1 3 | 9 2 4 | 8 5 6 
---------------------
9 6 1 | 5 3 7 | 2 8 4 
2 8 7 | 4 1 9 | 6 3 5 
3 4 5 | 2 8 6 | 1 7 9 


# Part **3**

# **Install neccesary libraries**

In [None]:
pip install tensorflow opencv-python scikit-learn



Part 3

In [None]:
import cv2
import numpy as np
import os
import sys
import tensorflow as tf
import matplotlib.pyplot as plt

from sklearn.model_selection import train_test_split
from tensorflow.keras.callbacks import EarlyStopping

EPOCHS = 50  # Let EarlyStopping decide the best epoch
IMG_WIDTH = 30
IMG_HEIGHT = 30
NUM_CATEGORIES = 43
TEST_SIZE = 0.4


def main():
    # Check command-line arguments
    if len(sys.argv) != 2:
        sys.exit("Usage: python traffic.py data_directory")

    # Get image arrays and labels for all image files
    images, labels = load_data(sys.argv[1])

    # Split data into training and testing sets
    labels = tf.keras.utils.to_categorical(labels)
    x_train, x_test, y_train, y_test = train_test_split(
        np.array(images), np.array(labels), test_size=TEST_SIZE
    )

    # Get a compiled neural network
    model = get_model()

    # Early stopping callback
    early_stop = EarlyStopping(monitor='val_loss', patience=3, restore_best_weights=True)

    # Train the model with validation split and early stopping
    history = model.fit(
        x_train, y_train,
        epochs=EPOCHS,
        validation_split=0.2,
        callbacks=[early_stop]
    )

    # Evaluate neural network performance
    model.evaluate(x_test, y_test, verbose=2)

    # Plot training history
    plot_training_history(history)

    # Save model with versioning
    save_model_with_version(model)


def load_data(data_dir):
    images = []
    labels = []

    for label in range(NUM_CATEGORIES):
        label_dir = os.path.join(data_dir, str(label))
        if not os.path.isdir(label_dir):
            continue

        for filename in os.listdir(label_dir):
            filepath = os.path.join(label_dir, filename)
            image = cv2.imread(filepath)

            if image is None:
                continue

            image = cv2.resize(image, (IMG_WIDTH, IMG_HEIGHT))
            images.append(image)
            labels.append(label)

    return images, labels


def get_model():
    model = tf.keras.models.Sequential([
        tf.keras.layers.Conv2D(32, (3, 3), activation='relu', input_shape=(IMG_WIDTH, IMG_HEIGHT, 3)),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

        tf.keras.layers.Conv2D(64, (3, 3), activation='relu'),
        tf.keras.layers.MaxPooling2D(pool_size=(2, 2)),

        tf.keras.layers.Flatten(),

        tf.keras.layers.Dense(128, activation='relu'),
        tf.keras.layers.Dropout(0.5),

        tf.keras.layers.Dense(NUM_CATEGORIES, activation='softmax')
    ])

    model.compile(
        optimizer='adam',
        loss='categorical_crossentropy',
        metrics=['accuracy']
    )

    return model


def plot_training_history(history):
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs_range = range(len(acc))

    plt.figure(figsize=(12, 5))

    plt.subplot(1, 2, 1)
    plt.plot(epochs_range, acc, label='Training Accuracy')
    plt.plot(epochs_range, val_acc, label='Validation Accuracy')
    plt.legend(loc='lower right')
    plt.title('Training vs. Validation Accuracy')

    plt.subplot(1, 2, 2)
    plt.plot(epochs_range, loss, label='Training Loss')
    plt.plot(epochs_range, val_loss, label='Validation Loss')
    plt.legend(loc='upper right')
    plt.title('Training vs. Validation Loss')

    plt.tight_layout()
    plt.show()


def save_model_with_version(model, folder="."):
    """
    Save model to file as modelV#.h5, where # is the next available version number.
    """
    version = 1
    while os.path.exists(os.path.join(folder, f"modelV{version}.h5")):
        version += 1

    filename = os.path.join(folder, f"modelV{version}.h5")
    model.save(filename)
    print(f"Model saved to {filename}.")


if __name__ == "__main__":
    main()



ValueError: zero-size array to reduction operation maximum which has no identity