<a href="https://colab.research.google.com/github/patryklatka/Analysis-and-Databases/blob/main/Regex.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

###In this exercise, I learned about the basic use of regular expressions and POSIX syntax.###

In [9]:
dsn_database = "wbauer_adb_2023"   # Specify the name of  Database
dsn_hostname = "pgsql-196447.vipserv.org"  # Specify host name
dsn_port = "5432"                # Specify your port number.
dsn_uid = "wbauer_adb"         # Specify your username.
dsn_pwd = "adb2020"        # Specify your password.

install.packages("RPostgres")
library(DBI)
library(RPostgres)
library(testthat)

con <- dbConnect(Postgres(), dbname = dsn_database, host=dsn_hostname, port=dsn_port, user=dsn_uid, password=dsn_pwd)

Installing package into ‘/usr/local/lib/R/site-library’
(as ‘lib’ is unspecified)



In [3]:
film_in_category <- function(category)
{
    # Function that returns the result of a database query for the movie title, language, and category for the given:
    # - id: if categry is integer
    # - name: if category is a character, exactly like the given value
    # Example of the resulting table:
    # | |title |languge |category|
    # |0 |Amadeus Holy |English |Action|
    #
    # The resulting table is to be sorted by as many movies and language.
    #
    # If the input conditions are not met then the function should return NULL.
    #
    # Parameters:
    # category (integer,character): the value of the category by id (if integer type) or name (if character type) for which we are executing the query.
    #
    # Returns:
    # DataFrame: the DataFrame containing the results of the query.

    if(is.integer(category)){
        film_in_category <- dbGetQuery(con, paste("SELECT film.title title, language.name as languge, category.name as category
                        FROM film_category
                        LEFT JOIN film ON film_category.film_id = film.film_id
                        LEFT JOIN language ON film.language_id = language.language_id
                        LEFT JOIN category ON category.category_id = film_category.category_id
                        WHERE film_category.category_id =", category,
                                                  "ORDER BY film.title , language.name  ", sep=""))

        return(film_in_category)
    }

    if(is.character(category)){
        film_in_category <- dbGetQuery(con, paste("SELECT film.title title, language.name as languge, category.name as category
                        FROM film_category
                        LEFT JOIN film ON film_category.film_id = film.film_id
                        LEFT JOIN language ON film.language_id = language.language_id
                        LEFT JOIN category ON category.category_id = film_category.category_id
                        WHERE category.name = '", category,"'",
                        "ORDER BY film.title , language.name  ", sep=""))

        return(film_in_category)
    }



    else{
        return(NULL)
    }



}

In [4]:
film_in_category_case_insensitive <- function(category)
{
    # Function that returns the result of a database query for the movie title, language, and category for the given:
    # - id: if categry is integer
    # - name: if category is character
    # Example of the resulting table:
    # | |title |languge |category|
    # |0 |Amadeus Holy |English |Action|
    #
    # The resulting table is to be sorted by so many movie and language.
    #
    # If the input conditions are not met then the function should return NULL.

    # Parameters:
    # category (integer,str): the value of the category by id (if integer type) or name (if character type) for which we are executing the query.
    #
    # Returns:
    # DataFrame: the DataFrame containing the results of the query

    if(is.integer(category)){
        film_in_category <- dbGetQuery(con, paste("SELECT film.title as title, language.name as languge, category.name as category
                        FROM film_category
                        INNER JOIN film ON film_category.film_id = film.film_id
                        INNER JOIN language ON film.language_id = language.language_id
                        INNER JOIN category ON category.category_id = film_category.category_id
                        WHERE category.category_id =", category,
                                                  "ORDER BY film.title ASC, language.name ASC ", sep=""))

        return(film_in_category)
    }

    else if(is.character(category)){
        query <- sprintf("SELECT film.title title, language.name as languge, category.name as category
                        FROM language
                        INNER JOIN film ON language.language_id = film.language_id
                        INNER JOIN film_category ON film.film_id = film_category.film_id
                        INNER JOIN category ON category.category_id = film_category.category_id
                        WHERE LOWER(category.name) = LOWER('%s')
                        ORDER BY film.title , language.name  ",category)

        film_in_category <- dbGetQuery(con, query)

        return(film_in_category)
    }



    else{
        return(NULL)
    }
}

In [5]:
film_cast <- function(title)
{
    # A function that returns the result of a database query for the cast of a movie with the exact given title.
    # Example of the resulting table:
    # | |first_name |last_name |.
    # |0 |Greg |Chaplin |
    #
    # The resulting table is to be sorted by the customer's last name and first name.
    # If the input conditions are not met then the function should return NULL.
    #
    # Parameters:
    # title (character): the value of the id of the category for which we perform the query.
    #
    # Returns:
    # DataFrame: the DataFrame containing the results of the query

    if(is.character(title)){
        film_cast <- dbGetQuery(con, paste("SELECT a.first_name AS first_name, a.last_name AS last_name
                                FROM actor a
                                LEFT JOIN film_actor fa ON a.actor_id = fa.actor_id
                                LEFT JOIN film f ON fa.film_id = f.film_id
                                WHERE f.title = '",title,"'",
                                "ORDER BY a.last_name, a.first_name", sep=''))

        return(film_cast)

    }


    else{
    return(NULL)
    }
}

In [6]:
film_title_case_insensitive <- function(words)
{
    # A function that returns the result of a database query for movie titles containing at least one of the specified words from the words list.
    # Example of the resulting table:
    # | |title |
    # |0 |Crystal Breaking |
    #
    # The resulting table is to be sorted by the customer's last name and first name.
    #
    # If the input conditions are not met then the function should return NULL.
    #
    # Parameters:
    # words(list[character]): minimum film length value.
    #
    # Returns:
    # DataFrame: dataFrame containing the results of the query

    if(is.character(words)){
        word = '('
        for(el in words){
            if(el == words[1]){
                word <- paste(word, el, sep='')
            }
            else{
                word <- paste(word, '|', el, sep='')
            }

        }



        film_title_case_insensitive <- dbGetQuery(con, paste("SELECT film.title AS title
                                                  FROM film
                                                  WHERE film.title ~*'( |^)", word, ")( |$)'",
                                                             "ORDER BY film.title", sep=''))

        return(film_title_case_insensitive)
    }




    else{
        return(NULL)
    }
}

**Find all the names of countries beginning with P.**

In [10]:
solution <- dbGetQuery(con, "SELECT country.country
                   FROM country
                   WHERE country ~ '^P'")
solution

country
<chr>
Pakistan
Paraguay
Peru
Philippines
Poland
Puerto Rico


**Find all the names of countries beginning with P and ending with s.**

In [11]:
solution <- dbGetQuery(con, "SELECT country.country
                   FROM country
                   WHERE country ~ '^P.*s$'")
solution

country
<chr>
Philippines


**Find all movie titles with digits in them.**

In [17]:
solution <- dbGetQuery(con, "SELECT film.title
                   FROM film
                   WHERE title ~ '[0-9]'")
solution

title
<chr>


**Find all employees with a double first or last name.**

In [13]:
solution <- dbGetQuery(con, "SELECT *
                   FROM staff
                   WHERE first_name ~ '% %'")
solution

staff_id,first_name,last_name,address_id,email,store_id,active,username,password,last_update,picture
<int>,<chr>,<chr>,<int>,<chr>,<int>,<lgl>,<chr>,<chr>,<dttm>,<blob>


**Find all names of actors starting with P or C and having 5 characters.**

In [14]:
solution <- dbGetQuery(con, "SELECT last_name
                   FROM actor
                   WHERE last_name ~ '^(P|C).{4}$'")
solution

last_name
<chr>
Chase
Posey
Pesci
Crowe
Chase
Close


**Find all movie titles with the words Trip or Alone.**

In [15]:
solution <- dbGetQuery(con, "SELECT title
                   FROM film
                   WHERE title ~ '.*(Alone|Trip).*' ")
solution

title
<chr>
Alone Trip
Superfly Trip
Trip Newton
Varsity Trip
