<a href="https://colab.research.google.com/github/tahira4/Project1-File-Processing-System/blob/main/Project_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from google.colab import drive
drive.mount('/content/drive')


Mounted at /content/drive


In [None]:
%%writefile word_cou12.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <time.h> // Include for timing
#include <unistd.h> // Include for usleep
#include <sys/wait.h> // Include for wait
#include <sys/types.h> // Include for fork
#include <sys/ipc.h> // Include for shared memory
#include <sys/shm.h> // Include for shared memory

#define MAX_FILES 9
#define MAX_THREADS 9

const char *files[MAX_FILES] = {
    "/content/drive/MyDrive/Colab Notebooks/calgary/paper1",
    "/content/drive/MyDrive/Colab Notebooks/calgary/paper2",
    "/content/drive/MyDrive/Colab Notebooks/calgary/trans",
    "/content/drive/MyDrive/Colab Notebooks/calgary/obj1",
    "/content/drive/MyDrive/Colab Notebooks/calgary/progp",
    "/content/drive/MyDrive/Colab Notebooks/calgary/progl",
    "/content/drive/MyDrive/Colab Notebooks/calgary/bib",
    "/content/drive/MyDrive/Colab Notebooks/calgary/progc",
    "/content/drive/MyDrive/Colab Notebooks/calgary/geo"
    // Ensure all paths are valid
};

typedef struct {
    const char *filename;
    const char *word;
    int count;
} WordCount;

// 1. Function that counts the occurrences of a word in a file using multiple threads
void *count_word_in_file(void *arg) {
    WordCount *wc = (WordCount *)arg;
    FILE *file = fopen(wc->filename, "r");

    if (!file) {
        fprintf(stderr, "Error opening file: %s\n", wc->filename);
        wc->count = 0;  // Set count to 0 if file can't be opened
        return NULL;
    }

    char buffer[1024];
    wc->count = 0;

    // Count occurrences of the word
    while (fgets(buffer, sizeof(buffer), file)) {
        char *ptr = buffer;
        while ((ptr = strstr(ptr, wc->word)) != NULL) {
            wc->count++;
            ptr += strlen(wc->word);
        }
    }

    fclose(file);
    return NULL;
}

// 2. Processing a file using multiple threads
void process_files_with_threads(const char *word, const char *filename) {
    pthread_t threads[MAX_THREADS];
    WordCount wc[MAX_THREADS];
    int total_count = 0;

    for (int i = 0; i < MAX_THREADS; i++) {
        wc[i].filename = filename;
        wc[i].word = word;
        wc[i].count = 0;

        pthread_create(&threads[i], NULL, count_word_in_file, &wc[i]);
    }

    for (int i = 0; i < MAX_THREADS; i++) {
        pthread_join(threads[i], NULL);
        total_count += wc[i].count;
    }

    printf("File: %s, Total count of '%s': %d\n", filename, word, total_count);
}

// 3. Measure the performance of the function
void measure_performance(void (*func)(const char *, const char *), const char *word, const char *filename, const char *mode) {
    clock_t start_time = clock();

    func(word, filename);

    clock_t end_time = clock();
    double time_taken = (double)(end_time - start_time) / CLOCKS_PER_SEC;
    printf("Mode: %s, Time: %f seconds\n", mode, time_taken);
}
// Key features 3: Shared memory
// 4. Process creation and multithreading
void create_process_and_count(const char *word) {
    int shmid;
    key_t key = 1234;
    int *shared_memory;
    size_t size = sizeof(int);

    // 5. Set up shared memory for IPC
    if ((shmid = shmget(key, size, IPC_CREAT | 0666)) < 0) {
        perror("shmget");
        exit(1);
    }
    if ((shared_memory = shmat(shmid, NULL, 0)) == (int *)-1) {
        perror("shmat");
        exit(1);
    }

    for (int i = 0; i < MAX_FILES; i++) {
        pid_t pid = fork();  // Create a new process for each file

        if (pid < 0) {
            perror("fork");
            exit(1);
        }
        // Key features 1: Forking
        // Key features 2: Multithreading

        // Process creation: Each child process should be responsible for one file.
        if (pid == 0) {  // Child process
            printf("Child process for file: %s\n", files[i]);

            // Perform word counting using threads
            process_files_with_threads(word, files[i]);

            // 6. Write results to shared memory
            *shared_memory = wc[i].count;

            exit(0);
        }
    }

    // 7. Parent process waits for all children
    for (int i = 0; i < MAX_FILES; i++) {
        wait(NULL);
    }

    // 8. Read and print word count from shared memory
    printf("Total word count (from shared memory): %d\n", *shared_memory);

    // Detach and remove shared memory
    shmdt(shared_memory);
    shmctl(shmid, IPC_RMID, NULL);
}

int main() {
    const char *word = "the";  // Word to search for

    // 9. Measure the performance for multiprocessing
    printf("=== Multiprocessing with Multithreading ===\n");
    measure_performance(create_process_and_count, word, "Multiprocessing with Multithreading");

    return 0;
}


Overwriting word_cou12.c


# New Section

In [None]:
# prompt: gcc %%writefile word_cou12.c

!gcc word_cou12.c -o word_cou12 -lpthread


In [None]:
!./word_cou12 "/content/drive/MyDrive/Colab Notebooks/calgary"

File: /content/drive/MyDrive/Colab Notebooks/calgary/paper1, Word: 'and', Count: 155
File: /content/drive/MyDrive/Colab Notebooks/calgary/paper2, Word: 'and', Count: 224
File: /content/drive/MyDrive/Colab Notebooks/calgary/trans, Word: 'and', Count: 254
File: /content/drive/MyDrive/Colab Notebooks/calgary/obj1, Word: 'and', Count: 0
File: /content/drive/MyDrive/Colab Notebooks/calgary/progp, Word: 'and', Count: 103
File: /content/drive/MyDrive/Colab Notebooks/calgary/progl, Word: 'and', Count: 60
File: /content/drive/MyDrive/Colab Notebooks/calgary/bib, Word: 'and', Count: 383
File: /content/drive/MyDrive/Colab Notebooks/calgary/progc, Word: 'and', Count: 33
File: /content/drive/MyDrive/Colab Notebooks/calgary/geo, Word: 'and', Count: 0
Total word count across files: 1212
Mode: Multithreading, Time: 0.004459 seconds


In [None]:
%%writefile word_count.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <pthread.h>
#include <dirent.h>
#include <sys/types.h>
#include <sys/wait.h>
#include <unistd.h>
#define MAX_THREADS 4
#define MAX_WORD_LENGTH 100
// List of files to process
char *files[] = {"bib", "paper1", "paper2", "trans", "progp", "progl", "progc"};
char *file_paths[] = {
    "Project1-File-processing-system/calgary/bib",
    "Project1-File-processing-system/calgary/paper1",
    "Project1-File-processing-system/calgary/paper2",
    "Project1-File-processing-system/calgary/trans",
    "Project1-File-processing-system/calgary/progp",
    "Project1-File-processing-system/calgary/progl",
    "Project1-File-processing-system/calgary/progc"
};

// Struct to pass data to threads
typedef struct {
    char *file_path;
    char *word;
    int word_count;
} thread_data_t;

void *count_word(void *arg) {
    thread_data_t *data = (thread_data_t *)arg;
    FILE *file = fopen(data->file_path, "r");
    if (file == NULL) {
        printf("Error opening file: %s\n", data->file_path);
        return NULL;
    }

    char word[MAX_WORD_LENGTH];
    while (fscanf(file, "%s", word) != EOF) {
        if (strcmp(word, data->word) == 0) {
            data->word_count++;
        }
    }

    fclose(file);
    return NULL;
}

int main(int argc, char *argv[]) {
    if (argc != 2) {
        fprintf(stderr, "Usage: %s <word_to_search>\n", argv[0]);
        return 1;
    }

    char *word_to_search = argv[1];
    pthread_t threads[MAX_THREADS];
    thread_data_t thread_data[MAX_THREADS];

    int num_files = sizeof(files) / sizeof(files[0]);

    for (int i = 0; i < num_files; i++) {
        printf("Processing file: %s\n", files[i]);

        // Create thread data
        for (int j = 0; j < MAX_THREADS; j++) {
            thread_data[j].file_path = file_paths[i];
            thread_data[j].word = word_to_search;
            thread_data[j].word_count = 0;

            if (pthread_create(&threads[j], NULL, count_word, &thread_data[j]) != 0) {
                perror("Failed to create thread");
                return 1;
            }
        }

        // Join threads
        for (int j = 0; j < MAX_THREADS; j++) {
            pthread_join(threads[j], NULL);
        }

        // Sum up word counts from all threads
        int total_count = 0;
        for (int j = 0; j < MAX_THREADS; j++) {
            total_count += thread_data[j].word_count;
        }

        printf("File: %s, Word: '%s', Count: %d\n", files[i], word_to_search, total_count);
    }

    return 0;
}


Overwriting word_count.c


In [None]:
!gcc -pthread -o word_count word_count.c


In [None]:
!./word_count "your_search_word"


Processing file: bib
Error opening file: Project1-File-processing-system/calgary/bib
Error opening file: Project1-File-processing-system/calgary/bib
Error opening file: Project1-File-processing-system/calgary/bib
Error opening file: Project1-File-processing-system/calgary/bib
File: bib, Word: 'your_search_word', Count: 0
Processing file: paper1
Error opening file: Project1-File-processing-system/calgary/paper1
Error opening file: Project1-File-processing-system/calgary/paper1
Error opening file: Project1-File-processing-system/calgary/paper1
Error opening file: Project1-File-processing-system/calgary/paper1
File: paper1, Word: 'your_search_word', Count: 0
Processing file: paper2
Error opening file: Project1-File-processing-system/calgary/paper2
Error opening file: Project1-File-processing-system/calgary/paper2
Error opening file: Project1-File-processing-system/calgary/paper2
Error opening file: Project1-File-processing-system/calgary/paper2
File: paper2, Word: 'your_search_word', Coun