### This set of functions was written as a part of an exercise in building R packages.

This function takes in a dataset and either imputes missing values or removes rows that contain missing values.

In [1]:
quick_clean=function(x, method){
  imputer=function(x){
    if(is.numeric(x)){
      x[is.na(x)]=mean(x, na.rm=TRUE)
    }else{
      levels=unique(x)
      x[is.na(x)]=levels[which.max(tabulate(match(x,levels)))]
    }
    return(x)
  }

  if (method=="impute"){
    x=data.frame(lapply(x,imputer))
  }else if (method=="remove"){
    x=na.exclude(x)
  }else{
    print("Error: please input 'impute' or 'remove' as your method")
  }

  return(x)
}


This function takes in a training dataset, a testing dataset, and a target variable and outputs a random forest and its corresponding accuracy score

In [2]:
quick_model=function(train,test,y){
  library(ranger)
  names(train)[names(train)==y]='target'
  names(test)[names(test)==y]='target'
  model=ranger(target ~.,data= train)
  pred=predict(model, data = test)$predictions
  cm=confusionMatrix(pred, test$target, positive = "1")
  return (cm$overall['Accuracy'])
}

This function takes in a dataset and a number that corresponds to a certain output of visualizations

In [3]:
quick_visual=function(x, set){
  library(ggplot2)

  if(set==1){
    for (i in 1:ncol(x)){
      if (!is.numeric(x[,i]) & nlevels(x[,i])<5){
        for(j in 1:ncol(x)){
          if (!is.numeric(x[,j]) & names(x)[j]!=names(x)[i] & nlevels(x[,j])<5){
            chart=ggplot(x)+geom_bar(mapping=aes(x=x[,i],fill=x[,j]),position="dodge")+labs(x=names(x)[i],fill=names(x)[j])
            print(chart)
          }
        }
      }
    }
  }else if(set==2){
    for (i in 1:ncol(x)){
      if (is.numeric(x[,i])){
        for(j in 1:ncol(x)){
          if (!is.numeric(x[,j])& nlevels(x[,j])<5){
            chart=ggplot(data=x)+geom_density(mapping=aes(x=x[,i],color=x[,j]))+labs(x=names(x)[i],color=names(x)[j])
            print(chart)
          }
        }
      }
    }

  }else if(set==3){
    for (i in 1:ncol(x)){
      if (is.numeric(x[,i])){
        for(j in 1:ncol(x)){
          if (is.numeric(x[,j]) & names(x)[j]!=names(x)[i]){
            chart=ggplot(data=x)+geom_point(mapping=aes(x=x[,i],y=x[,j]))+labs(x=names(x)[i],y=names(x)[j])
            print(chart)
          }
        }
      }
    }
  }else{
    print("Please input a number between 1 and 3 to recieve graph outputs.")
  }
}