<a href="https://colab.research.google.com/github/ryandoyle5401/OS_Project1/blob/main/proj1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Seven processes reading seven files - Works

In [20]:
%%writefile proj1.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>

#define MAX_WORDS 50
#define MAX_WORD_LENGTH 20
#define NUM_OF_PROCESSES 7

int main() {
    // Store paths to all 7 files
    char *file_paths[] = {
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/bib",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/paper1",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/paper2",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/progc",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/progl",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/progp",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/trans"
    };

    // Create pipes for each process
    int pipes[NUM_OF_PROCESSES][2];
    for (int i = 0; i < NUM_OF_PROCESSES; i++) {
        if (pipe(pipes[i]) == -1) {
            perror("failed to create pipe");
            exit(1);
        }
    }

    // Keywords to keep track of
    char keywords[MAX_WORDS][MAX_WORD_LENGTH] = {
        "and", "as", "at", "be", "but", "by", "for", "if", "in", "into",
        "is", "it", "no", "not", "of", "on", "or", "such", "that", "the",
        "their", "then", "there", "these", "they", "this", "to", "was",
        "will", "with", "a", "an", "are", "can", "could", "have", "me",
        "my", "you", "your", "we", "were", "good", "do", "from", "what",
        "so", "like", "just", "some"
    };

    // Array to hold frequencies for each keyword
    int frequencies[MAX_WORDS] = {0};

    // Start time measurement
    struct timeval start, end;
    gettimeofday(&start, NULL);  // Record the start time

    pid_t pid[NUM_OF_PROCESSES];
    for (int i = 0; i < NUM_OF_PROCESSES; i++) {
        pid[i] = fork();
        if (pid[i] == -1) {
            perror("failed to create child process");
            exit(1);
        } else if (pid[i] == 0) {
            // Child process
            FILE *file_ptr = fopen(file_paths[i], "r");
            if (file_ptr == NULL) {
                perror("failed to open file");
                exit(1);
            }

            int local_frequencies[MAX_WORDS] = {0}; // Local frequencies for this process
            char str[256];

            while (fgets(str, sizeof(str), file_ptr) != NULL) {
                // Tokenize the string
                char *token = strtok(str, " \n\t.,;:!?");
                while (token != NULL) {
                    // Check each keyword against the token
                    for (int j = 0; j < MAX_WORDS; j++) {
                        if (strcmp(token, keywords[j]) == 0) {
                            local_frequencies[j]++;
                        }
                    }
                    token = strtok(NULL, " \n\t.,;:!?");
                }
            }
            fclose(file_ptr);

            // Close the read end of the pipe in the child
            close(pipes[i][0]);
            // Send the frequencies to the parent process via the pipe
            write(pipes[i][1], local_frequencies, sizeof(local_frequencies));
            close(pipes[i][1]);  // Close the write end after sending
            exit(0);  // Child process exits after completion
        }
    }

    // Parent process: collect counts from children
    for (int i = 0; i < NUM_OF_PROCESSES; i++) {
        // Close the write end of the pipe in the parent
        close(pipes[i][1]);

        int child_frequencies[MAX_WORDS];
        // Read the frequencies from the child
        read(pipes[i][0], child_frequencies, sizeof(child_frequencies));

        // Accumulate frequencies in the parent
        for (int j = 0; j < MAX_WORDS; j++) {
            frequencies[j] += child_frequencies[j];
        }

        // Close the read end after reading
        close(pipes[i][0]);
    }

    // Waiting for child processes to finish
    int status;
    pid_t wpid;
    int processes_remaining = NUM_OF_PROCESSES;

    while (processes_remaining > 0) {
        wpid = wait(&status);
        if (wpid > 0) {  // If a child process has terminated
            processes_remaining--;
        }
    }


    // End time measurement
    gettimeofday(&end, NULL);  // Record the end time

    // Calculate elapsed time in seconds and microseconds
    long seconds = end.tv_sec - start.tv_sec;
    long microseconds = end.tv_usec - start.tv_usec;
    long total_time = (seconds * 1000000) + microseconds;

    // Get resource usage
    struct rusage usage;
    getrusage(RUSAGE_CHILDREN, &usage); // Get resource usage for child processes

    // Display frequencies of keywords
    printf("Frequencies of keywords:\n");
    for (int j = 0; j < MAX_WORDS; j++) {
        if (frequencies[j] > 0) {  // Only print if the frequency is greater than zero
            printf("'%s': %d\n", keywords[j], frequencies[j]);
        }
    }

    printf("Total execution time: %ld microseconds.\n", total_time);
    printf("User CPU time used: %ld microseconds.\n", usage.ru_utime.tv_sec * 1000000 + usage.ru_utime.tv_usec);
    printf("System CPU time used: %ld microseconds.\n", usage.ru_stime.tv_sec * 1000000 + usage.ru_stime.tv_usec);
    printf("Maximum resident set size: %ld kilobytes.\n", usage.ru_maxrss);

    return EXIT_SUCCESS;
}


Overwriting proj1.c


In [21]:
%%shell
gcc proj1.c -o proj1
./proj1

Frequencies of keywords:
'and': 862
'as': 189
'at': 84
'be': 325
'but': 58
'by': 188
'for': 465
'if': 415
'in': 650
'into': 57
'is': 528
'it': 356
'no': 65
'not': 120
'of': 1192
'on': 263
'or': 108
'such': 30
'that': 282
'the': 1568
'their': 45
'then': 245
'there': 34
'these': 24
'they': 56
'this': 183
'to': 783
'was': 65
'will': 131
'with': 217
'a': 783
'an': 158
'are': 150
'can': 134
'could': 63
'have': 80
'me': 16
'my': 38
'you': 100
'your': 39
'we': 58
'were': 16
'good': 12
'do': 140
'from': 116
'what': 29
'so': 59
'like': 43
'just': 44
'some': 55
Total execution time: 50574 microseconds.
User CPU time used: 81631 microseconds.
System CPU time used: 24466 microseconds.
Maximum resident set size: 20632 kilobytes.




# Single child process reading a single file - Works

In [10]:
%%writefile proj1.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>

#define MAX_WORDS 50
#define MAX_WORD_LENGTH 20
#define NUM_OF_PROCESSES 1

int main() {
    // Store paths to file
    char *file_paths[] = {
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/bib"
    };

    // Create pipes for each process
    int pipes[NUM_OF_PROCESSES][2];
    for (int i = 0; i < NUM_OF_PROCESSES; i++) {
        if (pipe(pipes[i]) == -1) {
            perror("failed to create pipe");
            exit(1);
        }
    }

    // Keywords to keep track of
    char keywords[MAX_WORDS][MAX_WORD_LENGTH] = {
        "and", "as", "at", "be", "but", "by", "for", "if", "in", "into",
        "is", "it", "no", "not", "of", "on", "or", "such", "that", "the",
        "their", "then", "there", "these", "they", "this", "to", "was",
        "will", "with", "a", "an", "are", "can", "could", "have", "me",
        "my", "you", "your", "we", "were", "good", "do", "from", "what",
        "so", "like", "just", "some"
    };

    // Array to hold frequencies for each keyword
    int frequencies[MAX_WORDS] = {0};

    // Start time measurement
    struct timeval start, end;
    gettimeofday(&start, NULL);  // Record the start time

    pid_t pid[NUM_OF_PROCESSES];
    for (int i = 0; i < NUM_OF_PROCESSES; i++) {
        pid[i] = fork();
        if (pid[i] == -1) {
            perror("failed to create child process");
            exit(1);
        } else if (pid[i] == 0) {
            // Child process
            FILE *file_ptr = fopen(file_paths[i], "r");
            if (file_ptr == NULL) {
                perror("failed to open file");
                exit(1);
            }

            int local_frequencies[MAX_WORDS] = {0}; // Local frequencies for this process
            char str[256];

            while (fgets(str, sizeof(str), file_ptr) != NULL) {
                // Tokenize the string
                char *token = strtok(str, " \n\t.,;:!?");
                while (token != NULL) {
                    // Check each keyword against the token
                    for (int j = 0; j < MAX_WORDS; j++) {
                        if (strcmp(token, keywords[j]) == 0) {
                            local_frequencies[j]++;
                        }
                    }
                    token = strtok(NULL, " \n\t.,;:!?");
                }
            }
            fclose(file_ptr);

            // Close the read end of the pipe in the child
            close(pipes[i][0]);
            // Send the frequencies to the parent process via the pipe
            write(pipes[i][1], local_frequencies, sizeof(local_frequencies));
            close(pipes[i][1]);  // Close the write end after sending
            exit(0);  // Child process exits after completion
        }
    }

    // Parent process: collect counts from children
    for (int i = 0; i < NUM_OF_PROCESSES; i++) {
        // Close the write end of the pipe in the parent
        close(pipes[i][1]);

        int child_frequencies[MAX_WORDS];
        // Read the frequencies from the child
        read(pipes[i][0], child_frequencies, sizeof(child_frequencies));

        // Accumulate frequencies in the parent
        for (int j = 0; j < MAX_WORDS; j++) {
            frequencies[j] += child_frequencies[j];
        }

        // Close the read end after reading
        close(pipes[i][0]);
    }

    // Waiting for child processes to finish
    int status;
    pid_t wpid;
    int processes_remaining = NUM_OF_PROCESSES;

    while (processes_remaining > 0) {
        wpid = wait(&status);
        if (wpid > 0) {  // If a child process has terminated
            processes_remaining--;
        }
    }


    // End time measurement
    gettimeofday(&end, NULL);  // Record the end time

    // Calculate elapsed time in seconds and microseconds
    long seconds = end.tv_sec - start.tv_sec;
    long microseconds = end.tv_usec - start.tv_usec;
    long total_time = (seconds * 1000000) + microseconds;

    // Get resource usage
    struct rusage usage;
    getrusage(RUSAGE_CHILDREN, &usage); // Get resource usage for child processes

    // Display frequencies of keywords
    printf("Frequencies of keywords:\n");
    for (int j = 0; j < MAX_WORDS; j++) {
        if (frequencies[j] > 0) {  // Only print if the frequency is greater than zero
            printf("'%s': %d\n", keywords[j], frequencies[j]);
        }
    }

    printf("Total execution time: %ld microseconds.\n", total_time);
    printf("User CPU time used: %ld microseconds.\n", usage.ru_utime.tv_sec * 1000000 + usage.ru_utime.tv_usec);
    printf("System CPU time used: %ld microseconds.\n", usage.ru_stime.tv_sec * 1000000 + usage.ru_stime.tv_usec);
    printf("Maximum resident set size: %ld kilobytes.\n", usage.ru_maxrss);

    return EXIT_SUCCESS;
}


Overwriting proj1.c


In [16]:
%%shell
gcc proj1.c -o proj1
./proj1

Frequencies of keywords:
'and': 256
'as': 12
'at': 4
'but': 1
'by': 20
'for': 132
'in': 153
'into': 2
'is': 6
'it': 2
'of': 372
'on': 86
'or': 4
'that': 4
'the': 146
'their': 1
'there': 1
'to': 57
'will': 1
'with': 23
'a': 124
'an': 44
'are': 2
'have': 4
'your': 2
'we': 1
'from': 13
'what': 3
'some': 4
Total execution time: 18646 microseconds.
User CPU time used: 76560 microseconds.
System CPU time used: 22121 microseconds.
Maximum resident set size: 20564 kilobytes.




# No child process, two threads created to read one file - Works

In [22]:
%%writefile proj1.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <pthread.h>

#define MAX_WORDS 50
#define MAX_WORD_LENGTH 20
#define MAX_LINE_LENGTH 256

// Structure to hold thread arguments
typedef struct {
    char *file_path;         // Path to the file
    int *local_frequencies;  // Frequencies for this thread
    int start_line;          // Start line for this thread
    int end_line;            // End line for this thread
    pthread_mutex_t *mutex;  // Mutex for synchronization
} thread_args_t;

// Keywords to search for
char keywords[MAX_WORDS][MAX_WORD_LENGTH] = {
    "and", "as", "at", "be", "but", "by", "for", "if", "in", "into",
    "is", "it", "no", "not", "of", "on", "or", "such", "that", "the",
    "their", "then", "there", "these", "they", "this", "to", "was",
    "will", "with", "a", "an", "are", "can", "could", "have", "me",
    "my", "you", "your", "we", "were", "good", "do", "from", "what",
    "so", "like", "just", "some"
};

// Thread function to read a portion of the file and count keyword frequencies
void *read_file(void *args) {
    thread_args_t *t_args = (thread_args_t *)args;
    FILE *file_ptr = fopen(t_args->file_path, "r");
    if (file_ptr == NULL) {
        perror("failed to open file");
        pthread_exit(NULL);
    }

    char str[MAX_LINE_LENGTH];
    int line_count = 0;

    pthread_mutex_lock(t_args->mutex); // Lock before updating shared data
    // Read the file line by line and only process the assigned lines
    while (fgets(str, sizeof(str), file_ptr) != NULL) {
        if (line_count >= t_args->start_line && line_count < t_args->end_line) {
            // Tokenize the string
            char *token = strtok(str, " \n\t.,;:!?");
            //pthread_mutex_lock(t_args->mutex); // Lock before updating shared data
            while (token != NULL) {
              //pthread_mutex_lock(t_args->mutex); // Lock before updating shared data
                // Check each keyword against the token
                for (int j = 0; j < MAX_WORDS; j++) {
                    if (strcmp(token, keywords[j]) == 0) {
                        //pthread_mutex_lock(t_args->mutex); // Lock before updating shared data
                        t_args->local_frequencies[j]++;
                        //pthread_mutex_unlock(t_args->mutex); // Unlock after updating
                    }
                }
                // Retrieves the next word in the string
                token = strtok(NULL, " \n\t.,;:!?");
                //pthread_mutex_unlock(t_args->mutex); // Unlock after updating
            }
            //pthread_mutex_unlock(t_args->mutex); // Unlock after updating
        }
        line_count++;
    }
    pthread_mutex_unlock(t_args->mutex); // Unlock after updating

    fclose(file_ptr);
    pthread_exit(NULL);
}

int main() {
    // Path to the single file
    char *file_path = "/content/drive/MyDrive/Colab Notebooks/txtfiles/bib";

    // Array to hold frequencies for each keyword
    int frequencies[MAX_WORDS] = {0};

    // Start time measurement
    struct timeval start, end;
    gettimeofday(&start, NULL);  // Record the start time

    // Open the file to determine the total number of lines
    FILE *file_ptr = fopen(file_path, "r");
    if (file_ptr == NULL) {
        perror("failed to open file");
        return EXIT_FAILURE;
    }

    // Count total lines in the file
    int line_count = 0;
    char str[MAX_LINE_LENGTH];
    while (fgets(str, sizeof(str), file_ptr) != NULL) {
        line_count++;
    }
    fclose(file_ptr);

    // Divide the file into two parts (half lines for each thread)
    int mid_line = line_count / 2;

    // Thread management
    pthread_t threads[2];
    pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; // Mutex for synchronization

    // Thread arguments for the two threads
    thread_args_t args1 = {file_path, frequencies, 0, mid_line, &mutex};
    thread_args_t args2 = {file_path, frequencies, mid_line, line_count, &mutex};

    // Create two threads to read different halves of the file and count keyword frequencies
    pthread_create(&threads[0], NULL, read_file, (void *)&args1);
    pthread_create(&threads[1], NULL, read_file, (void *)&args2);

    // Wait for both threads to finish
    pthread_join(threads[0], NULL);
    pthread_join(threads[1], NULL);

    // End time measurement
    gettimeofday(&end, NULL);  // Record the end time

    // Calculate elapsed time in seconds and microseconds
    long seconds = end.tv_sec - start.tv_sec;
    long microseconds = end.tv_usec - start.tv_usec;
    long total_time = (seconds * 1000000) + microseconds;

    // Get resource usage
    struct rusage usage;
    getrusage(RUSAGE_CHILDREN, &usage); // Get resource usage for child processes

    // Display frequencies of keywords
    printf("Frequencies of keywords:\n");
    for (int j = 0; j < MAX_WORDS; j++) {
        if (frequencies[j] > 0) {  // Only print if the frequency is greater than zero
            printf("'%s': %d\n", keywords[j], frequencies[j]);
        }
    }

    printf("Total execution time: %ld microseconds.\n", total_time);
    printf("User CPU time used: %ld microseconds.\n", usage.ru_utime.tv_sec * 1000000 + usage.ru_utime.tv_usec);
    printf("System CPU time used: %ld microseconds.\n", usage.ru_stime.tv_sec * 1000000 + usage.ru_stime.tv_usec);
    printf("Maximum resident set size: %ld kilobytes.\n", usage.ru_maxrss);

    return EXIT_SUCCESS;
}


Overwriting proj1.c


In [23]:
%%shell
gcc proj1.c -o proj1
./proj1

Frequencies of keywords:
'and': 256
'as': 12
'at': 4
'but': 1
'by': 20
'for': 132
'in': 153
'into': 2
'is': 6
'it': 2
'of': 372
'on': 86
'or': 4
'that': 4
'the': 146
'their': 1
'there': 1
'to': 57
'will': 1
'with': 23
'a': 124
'an': 44
'are': 2
'have': 4
'your': 2
'we': 1
'from': 13
'what': 3
'some': 4
Total execution time: 23925 microseconds.
User CPU time used: 74198 microseconds.
System CPU time used: 20065 microseconds.
Maximum resident set size: 20880 kilobytes.




# Single child process creating two threads to read a single file. - Does not Work Correctly

In [17]:
%%writefile proj1.c

#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <pthread.h>

#define MAX_WORDS 50
#define MAX_WORD_LENGTH 20
#define MAX_LINE_LENGTH 256

// Structure to hold thread arguments
typedef struct {
    char *file_path;         // Path to the file
    int *local_frequencies;  // Frequencies for this thread
    int start_line;          // Start line for this thread
    int end_line;            // End line for this thread
    pthread_mutex_t *mutex;  // Mutex for synchronization
} thread_args_t;

// Keywords to search for
char keywords[MAX_WORDS][MAX_WORD_LENGTH] = {
    "and", "as", "at", "be", "but", "by", "for", "if", "in", "into",
    "is", "it", "no", "not", "of", "on", "or", "such", "that", "the",
    "their", "then", "there", "these", "they", "this", "to", "was",
    "will", "with", "a", "an", "are", "can", "could", "have", "me",
    "my", "you", "your", "we", "were", "good", "do", "from", "what",
    "so", "like", "just", "some"
};

// Thread function to read a portion of the file and count keyword frequencies
void *read_file(void *args) {
    thread_args_t *t_args = (thread_args_t *)args;
    FILE *file_ptr = fopen(t_args->file_path, "r");
    if (file_ptr == NULL) {
        perror("failed to open file");
        pthread_exit(NULL);
    }

    char str[MAX_LINE_LENGTH];
    int line_count = 0;

    // Read the file line by line and only process the assigned lines
    while (fgets(str, sizeof(str), file_ptr) != NULL) {
        if (line_count >= t_args->start_line && line_count < t_args->end_line) {
            // Tokenize the string
            char *token = strtok(str, " \n\t.,;:!?");
            while (token != NULL) {
                // Check each keyword against the token
                for (int j = 0; j < MAX_WORDS; j++) {
                    if (strcmp(token, keywords[j]) == 0) {
                        pthread_mutex_lock(t_args->mutex); // Lock before updating shared data
                        t_args->local_frequencies[j]++;
                        pthread_mutex_unlock(t_args->mutex); // Unlock after updating
                    }
                }
                // Retrieves the next word in the string
                token = strtok(NULL, " \n\t.,;:!?");
            }
        }
        line_count++;
    }

    fclose(file_ptr);
    pthread_exit(NULL);
}

int main() {
    // Path to the single file
    char *file_path = "/content/drive/MyDrive/Colab Notebooks/txtfiles/bib";

    // Create a pipe
    int pipe_fd[2];
    if (pipe(pipe_fd) == -1) {
        perror("pipe failed");
        return EXIT_FAILURE;
    }

    // Start time measurement
    struct timeval start, end;
    gettimeofday(&start, NULL);  // Record the start time

    // Create a child process
    pid_t pid = fork();
    if (pid == -1) {
        perror("failed to create child process");
        return EXIT_FAILURE;
    } else if (pid == 0) {
        // Child process

        // Close the read end of the pipe
        close(pipe_fd[0]);

        // Array to hold frequencies for each keyword
        int frequencies[MAX_WORDS] = {0};

        // Open the file to determine the total number of lines
        FILE *file_ptr = fopen(file_path, "r");
        if (file_ptr == NULL) {
            perror("failed to open file");
            return EXIT_FAILURE;
        }

        // Count total lines in the file
        int line_count = 0;
        char str[MAX_LINE_LENGTH];
        while (fgets(str, sizeof(str), file_ptr) != NULL) {
            line_count++;
        }
        fclose(file_ptr);

        // Divide the file into two parts (half lines for each thread)
        int mid_line = line_count / 2;

        // Thread management
        pthread_t threads[2];
        pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; // Mutex for synchronization

        // Thread arguments for the two threads
        thread_args_t args1 = {file_path, frequencies, 0, mid_line, &mutex};
        thread_args_t args2 = {file_path, frequencies, mid_line, line_count, &mutex};

        // Create two threads to read different halves of the file and count keyword frequencies
        pthread_create(&threads[0], NULL, read_file, (void *)&args1);
        pthread_create(&threads[1], NULL, read_file, (void *)&args2);

        // Wait for both threads to finish
        pthread_join(threads[0], NULL);
        pthread_join(threads[1], NULL);

        // Send the frequencies to the parent process through the pipe
        write(pipe_fd[1], frequencies, sizeof(int) * MAX_WORDS);

        // Close the write end of the pipe
        close(pipe_fd[1]);

        exit(0); // Child process exits after sending the data
    } else {
        // Parent process

        // Close the write end of the pipe
        close(pipe_fd[1]);

        // Wait for the child process to finish
        wait(NULL);

        // End time measurement
        gettimeofday(&end, NULL);  // Record the end time

        // Calculate elapsed time in seconds and microseconds
        long seconds = end.tv_sec - start.tv_sec;
        long microseconds = end.tv_usec - start.tv_usec;
        long total_time = (seconds * 1000000) + microseconds;

        // Get resource usage
        struct rusage usage;
        getrusage(RUSAGE_CHILDREN, &usage); // Get resource usage for child processes

        // Array to hold the received frequencies from the child
        int frequencies[MAX_WORDS] = {0};

        // Read the frequencies from the pipe
        read(pipe_fd[0], frequencies, sizeof(int) * MAX_WORDS);

        // Close the read end of the pipe
        close(pipe_fd[0]);

        // Display frequencies of keywords
        printf("Frequencies of keywords:\n");
        for (int j = 0; j < MAX_WORDS; j++) {
            if (frequencies[j] > 0) {  // Only print if the frequency is greater than zero
                printf("'%s': %d\n", keywords[j], frequencies[j]);
            }
        }

        printf("Total execution time: %ld microseconds.\n", total_time);
        printf("User CPU time used: %ld microseconds.\n", usage.ru_utime.tv_sec * 1000000 + usage.ru_utime.tv_usec);
        printf("System CPU time used: %ld microseconds.\n", usage.ru_stime.tv_sec * 1000000 + usage.ru_stime.tv_usec);
        printf("Maximum resident set size: %ld kilobytes.\n", usage.ru_maxrss);
    }

    return EXIT_SUCCESS;
}


Overwriting proj1.c


In [19]:
%%shell
gcc proj1.c -o proj1
./proj1

Frequencies of keywords:
'and': 175
'as': 9
'at': 3
'by': 17
'for': 86
'in': 103
'into': 1
'is': 3
'of': 260
'on': 53
'or': 2
'that': 5
'the': 104
'their': 1
'there': 1
'to': 38
'will': 1
'with': 15
'a': 93
'an': 31
'are': 2
'have': 3
'your': 1
'from': 6
'what': 1
'some': 3
Total execution time: 15550 microseconds.
User CPU time used: 62273 microseconds.
System CPU time used: 19345 microseconds.
Maximum resident set size: 21008 kilobytes.




# Seven processes creating two threads - Does Not Work Correctly

In [None]:
%%writefile proj1.c
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <sys/wait.h>
#include <sys/time.h>
#include <sys/resource.h>
#include <pthread.h>

#define MAX_WORDS 50
#define MAX_WORD_LENGTH 20
#define NUM_OF_PROCESSES 7
#define MAX_LINE_LENGTH 256

// Structure to hold thread arguments
typedef struct {
    char *file_path;         // Path to the file
    int *local_frequencies;  // Frequencies for this thread
    int start_line;          // Start line for this thread
    int end_line;            // End line for this thread
    pthread_mutex_t *mutex;  // Mutex for synchronization
} thread_args_t;

char keywords[MAX_WORDS][MAX_WORD_LENGTH] = {
    "and", "as", "at", "be", "but", "by", "for", "if", "in", "into",
    "is", "it", "no", "not", "of", "on", "or", "such", "that", "the",
    "their", "then", "there", "these", "they", "this", "to", "was",
    "will", "with", "a", "an", "are", "can", "could", "have", "me",
    "my", "you", "your", "we", "were", "good", "do", "from", "what",
    "so", "like", "just", "some"
};

// Thread function to read a portion of the file
void *read_file(void *args) {
    thread_args_t *t_args = (thread_args_t *)args;
     pthread_mutex_lock(t_args->mutex); // Lock before updating shared data
    FILE *file_ptr = fopen(t_args->file_path, "r");
    if (file_ptr == NULL) {
        perror("failed to open file");
        pthread_exit(NULL);
    }

    char str[MAX_LINE_LENGTH];
    int line_count = 0;


    // Read the file line by line and only process the assigned lines
    // Count lines in the file
    while (fgets(str, sizeof(str), file_ptr) != NULL) {
        line_count++;
    }
    rewind(file_ptr); // Reset the file pointer to the beginning

    // Read the file for the specified portion
    int target_start = t_args->start_line;
    int target_end = t_args->end_line;

    for (int i = 0; i < line_count; i++) {
        fgets(str, sizeof(str), file_ptr);
        if (i < target_start) {
            continue;
        }
        if (i >= target_end) {
            break;
        }

        // Tokenize the string
        char *token = strtok(str, " \n\t.,;:!?");
        while (token != NULL) {
            // Check each keyword against the token
            for (int j = 0; j < MAX_WORDS; j++) {
                if (strcmp(token, keywords[j]) == 0) {
                    //pthread_mutex_lock(t_args->mutex); // Lock the mutex
                    t_args->local_frequencies[j]++;
                    //pthread_mutex_unlock(t_args->mutex); // Unlock the mutex
                }
            }
            token = strtok(NULL, " \n\t.,;:!?");
        }
    }
    pthread_mutex_unlock(t_args->mutex); // Unlock after updating

    fclose(file_ptr);
    pthread_exit(NULL);
}

int main() {
    // Store paths to files
    char *file_paths[] = {
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/bib",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/paper1",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/paper2",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/progc",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/progl",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/progp",
        "/content/drive/MyDrive/Colab Notebooks/txtfiles/trans"
    };

    // Array to hold frequencies for each keyword
    int frequencies[MAX_WORDS] = {0};

    // Start time measurement
    struct timeval start, end;
    gettimeofday(&start, NULL);  // Record the start time

    pthread_t threads[NUM_OF_PROCESSES][2]; // Two threads for each process
    pthread_mutex_t mutex = PTHREAD_MUTEX_INITIALIZER; // Mutex for synchronization
    int local_frequencies[NUM_OF_PROCESSES][MAX_WORDS]; // Local frequencies for each process

    // Create child processes
    pid_t pid[NUM_OF_PROCESSES];
    for (int i = 0; i < NUM_OF_PROCESSES; i++) {
        pid[i] = fork();
        if (pid[i] == -1) {
            perror("failed to create child process");
            exit(1);
        } else if (pid[i] == 0) {
            // Child process
            FILE *file_ptr = fopen(file_paths[0], "r");
            if (file_ptr == NULL) {
                perror("failed to open file");
                exit(1);
            }

            // Count total lines in the file
            int line_count = 0;
            char str[MAX_LINE_LENGTH];
            while (fgets(str, sizeof(str), file_ptr) != NULL) {
                line_count++;
            }
            rewind(file_ptr); // Reset the file pointer to the beginning

            // Create thread arguments for the first and second halves of the file
            thread_args_t args1 = {file_paths[0], local_frequencies[i], 0, line_count / 2, &mutex};
            thread_args_t args2 = {file_paths[0], local_frequencies[i], line_count / 2, line_count, &mutex};

            // Create threads to read the first and second halves
            pthread_create(&threads[i][0], NULL, read_file, (void *)&args1);
            pthread_create(&threads[i][1], NULL, read_file, (void *)&args2);

            // Wait for threads to finish
            pthread_join(threads[i][0], NULL);
            pthread_join(threads[i][1], NULL);

            fclose(file_ptr);
            exit(0);  // Child process exits after completion
        }
    }

    // Parent process: collect counts from children
    for (int i = 0; i < NUM_OF_PROCESSES; i++) {
        // Wait for child processes to finish
        wait(NULL);

        // Accumulate local frequencies in the parent
        for (int j = 0; j < MAX_WORDS; j++) {
            frequencies[j] += local_frequencies[i][j];
        }
    }

    // End time measurement
    gettimeofday(&end, NULL);  // Record the end time

    // Calculate elapsed time in seconds and microseconds
    long seconds = end.tv_sec - start.tv_sec;
    long microseconds = end.tv_usec - start.tv_usec;
    long total_time = (seconds * 1000000) + microseconds;

    // Get resource usage
    struct rusage usage;
    getrusage(RUSAGE_CHILDREN, &usage); // Get resource usage for child processes

    // Display frequencies of keywords
    printf("Frequencies of keywords:\n");
    for (int j = 0; j < MAX_WORDS; j++) {
        if (frequencies[j] > 0) {  // Only print if the frequency is greater than zero
            printf("'%s': %d\n", keywords[j], frequencies[j]);
        }
    }

    printf("Total execution time: %ld microseconds.\n", total_time);
    printf("User CPU time used: %ld microseconds.\n", usage.ru_utime.tv_sec * 1000000 + usage.ru_utime.tv_usec);
    printf("System CPU time used: %ld microseconds.\n", usage.ru_stime.tv_sec * 1000000 + usage.ru_stime.tv_usec);
    printf("Maximum resident set size: %ld kilobytes.\n", usage.ru_maxrss);

    return EXIT_SUCCESS;
}


In [None]:
%%shell
gcc proj1.c -o proj1
./proj1